/*	
	Purpose: This file takes the appended surveys
				from 1_Append_datasets and merges in 
				many variations of Census predicted 
				parental income. Census-adjusted weights 
				are made and are used to create 
				ranked measures of parental income. 

	Creates: 2_PooledData_analysis.dta
*/

clear all
set more off
set maxvar 10000

cd "$Mydirectory1/"

use ./3_Output/1_PooledData.dta, clear

* Modify unfinished value label for sex variable
	label define sex_l 2 "Female", modify
	tab sex, m 

* Drop ppl with missing decade or in the outer decades with small N
	drop if decade==1900 | decade==1980 | decade==.
	
* Drop missing sex or race
	tab data if race==.
	drop if race==. | sex==.
	
* Generate dummies for each decade
	tab decade, gen(decade_)

* Globals
	global datasets "anes gss avtmh57 avtmh76 nsfh nlsom nlsmw nsba nlsy79 nlsyw68 nlsym66 psid1997 psid2017 nfs ocg73 ocg62"
	global datasets_minus_2 " avtmh57 avtmh76 nsfh nlsom nlsmw nsba nlsy79 nlsyw68 nlsym66 psid1997 psid2017 nfs ocg73 ocg62"  
	
* Create education variable w/5 categories 
	/* Notes: (1) Need this variable to merge in 
				  father income scores by education
		      (2) There's no "0" category of the 
		          education variable from the Census. */	
	gen edu =.
	replace edu=1 if edu_dad<=1 
	replace edu=2 if edu_dad==2
	replace edu=3 if edu_dad==3
	replace edu=4 if edu_dad==4
	replace edu=5 if edu_dad>=5 & edu_dad~=.
	tab edu, m
	label var edu "edu of dad (as coded in income scores files)"
	
*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*

***********************
* SURVEY WEIGHTS
***********************
	
* Check: any missing or 0 values for survey-provided weights? 
	foreach i in $datasets {
		display "`i'"
		count if (weight_`i'==0 | weight_`i'==.) & data=="`i'"
	}

* Drop if weight is 0 or missing
	/*Note: A few 0s in the NLS surveys in OCG 73 */
	foreach i in $datasets {
		drop if (weight_`i'==0 | weight_`i'==.) & data=="`i'"
	}


* Verify: each weight has a mean of 1 in every year
	/*Note: NOT the case. */
	foreach i in $datasets {
		display "`i'"
		sum weight_`i' if data=="`i'"
	}
	

/* Fix ANES weight---make sure that the weight 
                     has mean 1 in every year */
	bysort year: tab weight_anes if data=="anes"
	
	gen weight_center_anes=1 if data=="anes"
	foreach num of numlist 1958 1960 1964 1968 1970  {
		sum weight_anes if year==`num' & data=="anes"
		local weight_avg = r(mean)

		replace weight_center_anes = weight_anes/`weight_avg' if year==`num' & data=="anes"
		sum weight_center_anes if year==`num' 
}
	label var weight_center_anes "Weight anes, centered"
	
/* Fix GSS weight---make sure that the weight 
                     has mean 1 in every year */
	gen weight_center_gss=.
	levelsof year if data=="gss", local(years)
	foreach l of local years{
	
		sum weight_gss if year==`l' & data=="gss"
		local weight_avg = r(mean)

		replace weight_center_gss = weight_gss/`weight_avg' if year==`l' & data=="gss"
		sum weight_center_gss if year==`l' 
}
	label var weight_center_gss "Weight gss, centered"
	
* Fix weight in other datasets (except for the psid)
	foreach i in $datasets_minus_2 {

		sum weight_`i' if data=="`i'"
		local weight_avg = r(mean)
		
		gen weight_center_`i' = weight_`i'/`weight_avg' if data=="`i'"
		label var weight_center_`i' "Weight `i', centered"
	}
	

* Make 1 harmonized weight for all datasets
	gen weight_center=.
	foreach i in $datasets {
		replace weight_center = weight_center_`i' if data=="`i'"
	}
	label var weight_center "Weight, centered"

	//Check that all respondents have a weight
	count if weight_center==. | weight_center==0
	
* Drop intermediary weights used to form harmonized weight
	rename weight_center test1
	drop weight_* 
	rename test1 weight_center
	
*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*

***********************
** ID VARIABLE
***********************

* Create 1 harmonized WITHIN SURVEY ID 
	gen temp1=.
	foreach i in $datasets {
		replace temp1 = id_`i' if data=="`i'"
	}
	assert temp1!=.
	drop id_*
	rename temp1 id_within_survey
	label var id_within_survey "ID within survey"
	
* Create 1 harmonized ACROSS SURVEY ID 
	egen id_all_surveys = group(data id_within_survey)
	sum id_all_surveys, d 
	label var id_all_survey "ID, all surveys"

*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*

******************
* INDICATORS *
******************

	gen incdata = (data=="anes" | data=="avtmh57" | data=="avtmh76" | data=="nsfh" | data=="gss" | data=="nfs" | data=="nlsom" | data=="nlsmw" | data=="nsba" | data=="ocg62" | data=="ocg73" | data=="nlsy79"| data=="nlsyw68" | data=="nlsym66" | data=="psid1997" | data=="psid2017" )	
	label variable incdata "Data can be used for inc-occ analysis"

	gen edudata = (data=="ocg62" | data=="ocg73" | data=="nsba" | data=="nlsmw" | data=="nlsom" | data=="gss" | data=="nsfh" | data=="avtmh76" | data=="nlsy79"| data=="nlsyw68" | data=="psid1997" | data=="psid2017" | data=="nlsym66")
	label variable edudata "Data can be used for edu-edu analysis"
	
	gen occdata = (data=="anes" | data=="avtmh76" | data=="nsfh" | data=="gss" | data=="nlsom" | data=="nlsmw" | data=="nsba" | data=="ocg62" | data=="ocg73" | data=="nlsy79"| data=="nlsyw68" | data=="nlsym66" | data=="nfs" | data=="psid1997" | data=="psid2017" )	
	label variable occdata "Data can be used for occ-occ analysis"
		
	gen natrepdata= (data=="anes" | data=="gss" | data=="nsfh" | data=="avtmh57" |  data=="avtmh76" | data=="nlsy79" | data=="psid1997" | data=="psid2017" )	
	label variable natrepdata "Data is nationally representative"
	//Note: "Nationally representative" = not male-only, female-only, black-only, or white- only.
	
	gen natrep_lesssex_data = (data=="ocg62" | data=="ocg73" | data=="nfs" | data=="nlsmw" | data=="nlsom" | data=="nlsyw68" | data=="nlsym66")
	label variable natrep_lesssex_data  "Data is nationally representative on all dimensions except sex"
	
*************************
* GEOGRAPHIC INDICATORS 
*************************
*-----------------------------------------*
* Region born or region grew up available
*-----------------------------------------*
	gen regbirth_data =.
	gen regchildhood_data =.

		foreach d of global datasets {
			di "`d'"
			
			count if data=="`d'"
			local data_n =`r(N)'
			
			count if data=="`d'" & region4_born==.
			local regborn_n = `r(N)'
			
			count if data=="`d'" & region4_childhood==.
			local regchild_n = `r(N)'
			

			replace regbirth_data = (`regborn_n'<`data_n') if data=="`d'"
			replace regchildhood_data = (`regchild_n'<`data_n') if data=="`d'"
		}
	
	tab data if regbirth_data==0 & regchildhood_data==0, m
	
 	* Create 1 harmonized variable 
 	/*Note: If both region born and region grew up are 
 			available, region grew up is assigned. */
	gen region_merge =.
	replace region_merge = region4_born if region4_born!=. & region4_childhood==.
	replace region_merge = region4_childhood if region4_born==. & region4_childhood!=.
	replace region_merge = region4_childhood if region4_born!=. & region4_childhood!=.
	tab data region_merge,m 
	
	label var regbirth_data "Datasets with region of birth available"
	label var regchildhood_data "Datasets with region of childhood available"
	label var region_merge "Region of birth or childhood, combined variable"
		
*-----------------------------------------*
* Born or grew up in the south
*-----------------------------------------*
	gen south_merge=0
	replace south_merge =1 if region_merge==3
	replace south_merge =1 if bornsouth==1 & (region4_born==. & region4_childhood==.) //3 surveys only have south/non-south info: NFS, NLSOM, NLSMW
	replace south_merge =1 if grewup_south==1 & (data=="nlsy79" | data=="nlsym66" | data=="nlsyw68") 
	replace south_merge =. if region_merge==. & grewup_south==. & bornsouth==.
	tab south_merge if incdata==1, m
	label var south_merge "Respondent grew up or born in South" 
	
*-----------------------------------------*
* Currently resides in the South
*-----------------------------------------*
	gen south_merge_son =0
	replace south_merge_son =1 if region4==3
	replace south_merge_son =1 if south_residence==1 
	replace south_merge_son =. if region4==. & south_residence==. 
	tab south_merge_son, m
	tab data south_merge_son, m
	
	label var south_merge_son "R currently resides in the South: used to merge son incscore by occ x race x south"
		
*------------------------------------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------------------------------------*

*****************************************************
/* MERGE IN CENSUS INCOME SCORES AT VARIOUS LEVELS
   FOR ADULT CHILD SURVEY RESPONDENTS */
*****************************************************

* 1. Occupation x race x (current) southern residence
	sort occRej race south_merge_son
	merge m:1 occRej race south_merge_son using "./1_DataSources/CensusData/output/IncomeScores_Coarsened_byrace_bysouth_R.dta"
	assert occRej==. | race==. | south_merge_son==. if _merge==1 
	drop if _merge==2
	drop _merge	
		
* 2. Occupation-only
	merge m:1 occRej using "./1_DataSources/CensusData/output/IncomeScores_Coarsened_all_R.dta"
	assert occRej==. if _merge==1 
	drop if _merge==2
	drop _merge	

*----------------------------------*
* SUPPLEMENTAL 1936 INCOME SCORES
*----------------------------------*
	merge m:1 occRej race south_merge_son using "./1_DataSources/ConsumptionSurvey_1936/output/ConsumptionSurvey_1936_IncomeScores_R.dta"
	assert (occRej==.) | race==. | south_merge_son==. if _merge==1 
	drop if _merge==2
	drop _merge		

	* Blend 1936 & 1940 
	gen avg_HHinc_1940_byocc_byr_bys_R = avg_HHinc_1940_byrace_bysouth 
	replace avg_HHinc_1940_byocc_byr_bys_R= avg_totfaminc_1936 if fatheroccej==81 | fatheroccej==21

*-------------------------------------------*
*-------------------------------------------*
	
*-------------------*
* LOG INCOME
*-------------------*

	gen log_son_baseline = ln(fam_inc_real)
	label var log_son_baseline "Logged R income, baseline measure"
	
	gen log_son_occscore = ln(avg_occscore_1950_byocc_R) 
	label var log_son_occscore "Logged R IPUMS occscore, 1950"

	//1940 (blended w/ 1936)
	gen log_income_byrace_bysouth_1940_R = ln(avg_HHinc_1940_byocc_byr_bys_R)

	//1960-2019
	foreach x in 1960 1970 1980 1990 2000 2010 2019 {
		gen log_income_byrace_bysouth_`x'_R = ln(avg_HHinc_`x'_byocc_byr_bys_R)
	}

	* Equivalized measure of income 
		//Preliminary step
		gen son_baseline_equivalized = fam_inc_real / sqrt(R_hhsize_plusR)
		label var son_baseline_equivalized "Son's family income normalized by HH size (divided by sq. root of number)"
		
		gen log_son_baseline_equivalized = ln(son_baseline_equivalized)
		label var log_son_baseline_equivalized "Logged R income, baseline divided by HH size"	
		
	
*-----------------------------*
* INTERPOLATED INCOME
*-----------------------------*

	gen R_sample = occRej!=. & race!=. & south_merge_son!=.
	gen age40 = dob+40 //calculate predicted income around 40
	
	gen log_son_interpolated=.
	
	/*1911-1920 cohorts---give them a weighted average of 
	                 1940 and 1960 income scores */
	foreach decade1 in 1940 {
		local decade2 = `decade1'+20		
			forval i=`decade1'(1)`decade2' {	
				local j=`i'-`decade1' //# years away from first decade
				local k=20-`j' //# years away from second decade
				
				replace log_son_interpolated = ((`k'/20)*log_income_byrace_bysouth_`decade1'_R) + ((`j'/20)*log_income_byrace_bysouth_`decade2'_R) if age40==`i'
			}		
	}

	/*1921-1970 cohorts---give them a weighted average of 
	                 	  two Censuses closest to when
	                 	  the respondent turned 40 */
	foreach decade1 in 1960 1970 1980 1990 2000 {
		local decade2 = `decade1'+10
			forval i=`decade1'(1)`decade2' {	
				local j=`i'-`decade1' //# years away from first decade
				local k=10-`j' //# years away from second decade
				
				replace log_son_interpolated = ((`k'/10)*log_income_byrace_bysouth_`decade1'_R) + ((`j'/10)*log_income_byrace_bysouth_`decade2'_R) if age40==`i'
			}
	}
	
	/*1971-1979 cohorts---give them a weighted average of 
	                 is 2010 and 2019 income scores */ 
	foreach decade1 in 2010 {
		local decade2 = `decade1'+9
		
			forval i=`decade1'(1)`decade2' {		
				local j=`i'-`decade1' //# years away from first decade
				local k=9-`j' //# years away from second decade
				
				replace log_son_interpolated = ((`k'/9)*log_income_byrace_bysouth_`decade1'_R) + ((`j'/9)*log_income_byrace_bysouth_`decade2'_R) if age40==`i'
			}
	}
	
	/*SPECIAL CASE: Respondents with occupation==65 have 
	                missing income score in 1990. Give them 
	                a weighted average of 1980 and 2000. */
	local decade1 = 1980
	local decade2 = 2000
	
	forval i=`decade1'(1)`decade2' {	
		local j=`i'-`decade1' //# years years away from first decade
		local k=20-`j' //# years years away from second decade
		
		replace log_son_interpolated = ((`k'/20)*log_income_byrace_bysouth_`decade1'_R) + ((`j'/20)*log_income_byrace_bysouth_`decade2'_R) if age40==`i' & log_son_interpolated==.
	}
	
	assert log_son_interpolated!=. if R_sample==1 
	drop R_sample age40 *byocc_byr_bys_R *1936*
	
*---------------------------------------------------------------------------------------------------------------------*
*---------------------------------------------------------------------------------------------------------------------*
	
************************************************
/* MERGE IN CENSUS INCOME SCORES  
   AT VARIOUS LEVELS FOR FATHERS  */
************************************************

	/* Important preliminary step: give all survey respondents 
	                               with father_notworking =1 an 
	                               occupation code of "99". Allows 
	                               Census income scores for 
	                               non-working fathers to be merged. */
	assert fatheroccej==. if father_notworking==1 
	replace fatheroccej =99 if father_notworking==1
	tab father_notworking fatheroccej if father_notworking==1, m 
	tab fatheroccej, m

* 1. Occupation x race x south 
	sort fatheroccej race south_merge
	merge m:1 fatheroccej race south_merge using "./1_DataSources/CensusData/output/IncomeScores_Coarsened_byrace_bysouth.dta"
	assert fatheroccej==. | race==. | south_merge==. if _merge==1 
	drop if _merge==2
	drop _merge
	
* 2. Occupation x race x south x education
	sort fatheroccej race south_merge edu 
	merge m:1 fatheroccej race south_merge edu using "./1_DataSources/CensusData/output/IncomeScores_Coarsened_1940_byrace_bysouth_byedu.dta"
	assert (fatheroccej==.| fatheroccej==99) | race==. | edu==. | south_merge==. if _merge==1 
	drop if _merge==2 
	drop _merge
	
* 3. Occupation-only
	merge m:1 fatheroccej using "./1_DataSources/CensusData/output/IncomeScores_Coarsened_all.dta"
	assert (fatheroccej==. | fatheroccej==99) if _merge==1 
	drop if _merge==2
	drop _merge
	
* 4. Occupation x race
	merge m:1 fatheroccej race using "./1_DataSources/CensusData/output/IncomeScores_Coarsened_byrace.dta"
	assert (fatheroccej==.| fatheroccej==99) | race==. if _merge==1 
	drop if _merge==2
	drop _merge
	
* 5. Occupation x race x 4 regions
	merge m:1 fatheroccej race region_merge using "./1_DataSources/CensusData/output/IncomeScores_Coarsened_byrace_byregion.dta"
	assert (fatheroccej==. | fatheroccej==99) | race==. | region_merge==. if _merge==1 
	drop if _merge==2
	drop _merge

* 7. Race-only 
	merge m:1 race using "./1_DataSources/CensusData/output/incomescores_fathers_1940_to1990_byrace.dta"
	assert race==.  if _merge==1 
	drop if _merge==2
	drop _merge

* 8. South-only 
	merge m:1 south_merge using "./1_DataSources/CensusData/output/incomescores_fathers_1940_to1990_bysouth.dta"
	assert south_merge==. if _merge==1 
	drop if _merge==2
	drop _merge

* 9. Occupation x south 
	merge m:1 fatheroccej south_merge using "./1_DataSources/CensusData/output/incomescores_fathers_1940_to1990_byocc_bys.dta"
	assert (fatheroccej==. | fatheroccej==99) | south_merge==. if _merge==1 
	drop if _merge==2
	drop _merge

* 10. Race x south 
	merge m:1 race south_merge using "./1_DataSources/CensusData/output/incomescores_fathers_1940_to1990_byr_bys.dta"
	assert race==. | south_merge==. if _merge==1 
	drop if _merge==2
	drop _merge

	drop number_*

*---------------------------------------------------------------------------------------------------------------------*
*---------------------------------------------------------------------------------------------------------------------*
	
**************************
*** LOG FATHER INCOME 
**************************	

* 1. Occupation x race x south 

	//HOUSEHOLD-LEVEL INCOME
		//1940
		gen log_father_1940_byr_bys_nofix = ln(avg_HHinc_1940_byrace_bysouth) 
		label var log_father_1940_byr_bys_nofix "Logged father's household income, 1940 by race by south, no fixes"

		gen log_father_1940_byr_bys_CWfix = ln(avg_HHinc_byr_bys_CWfix) 
		label var log_father_1940_byr_bys_CWfix "Logged father's household income, 1940 by race by south, CW farm fix"

		//1950-1990
		clonevar avg_HHinc_1950_byocc_byr_bys = avg_inctot_1950_byocc_byr_bys //Note:no hh-level income in 1950 
		label var avg_HHinc_1950_byocc_byr_bys "clone of avg_inctot_1950_byocc_byr_bys"

		forval i=1950(10)1990 {
			gen log_father_`i'_byr_bys = ln(avg_HHinc_`i'_byocc_byr_bys)
			label var log_father_`i'_byr_bys "Logged father's household income, `i', by race by south"
		}

			//ALTERNATIVE HH-LEVEL WEIGHT
				//1960-1990
				forval i=1960(10)1990 {
					gen log_father_`i'_byors_altwgt = ln(avg_HHinc_`i'_byors_altwgt)
					label var log_father_`i'_byors_altwgt "Logged father's household income, `i', by race by south, alt weight"
				}

	//PERSONAL INCOME
		//1940	
		gen log_father_incwage_nofix = ln(avgincwage_1940_byrace_bysouth) 
		label var log_father_incwage_nofix "Logged father's personal income, 1940 by race by south, no fixes"
		
		gen log_father_incwage_CWfix = ln(avg_incwage_byr_bys_CWfix) 
		label var log_father_incwage_CWfix "Logged father's income, 1940 incwage by race by south, CW farm fix"

		//1960-1990
		forval i=1960(10)1990 {
			gen log_father_inctot`i'_byr_bys = ln(avg_inctot_`i'_byocc_byr_bys)
			label var log_father_inctot`i'_byr_bys "Logged father's personal income (inctot), `i', by race by south"
		}	

* 2. Occupation-only
	
	//HOUSEHOLD-LEVEL INCOME
		//1940		
		gen log_father_1940occ_CWfix = ln(avg_HHinc_1940_CWfix) 
		label var log_father_1940occ_CWfix "Logged father's 1940 HH income score (just occ), CW fix"
		
		gen log_father_1940occ_nofix = ln(avg_HHinc_1940_nofix) 
		label var log_father_1940occ_nofix "Logged father's 1940 HH income score (just occ), no fix"

		//1950
		gen log_father_1950occscore = ln(avg_occscore_1950_byocc)
		label var log_father_1950occscore "Logged father's IPUMS occscore, 1950"
		
		//1960-1990
		foreach d in 1960 1970 1980 1990 {
			gen log_father_`d'occ = ln(avg_HHinc_`d'_byocc)
			label var log_father_`d'occ "Logged father's `d' income score (just occ)"
		}
		
	//INDIVIDUAL-LEVEL INCOME
		//1940 
		gen log_father_incwage_nofix_occ = ln(avg_incwage_1940_nofix) 
		label var log_father_incwage_nofix_occ "Logged father's personal income, 1940 occ only, no fixes"
		
		gen log_father_incwage_CWfix_occ = ln(avg_incwage_1940_CWfix) 
		label var log_father_incwage_CWfix_occ "Logged father's income, 1940 occ only, CW farm fix"

		//1960-1990
		forval i=1960(10)1990 {
			gen log_father_inctot`i'_occ = ln(avg_inctot_`i'_byocc)
			label var log_father_inctot`i'_occ "Logged father's personal income (inctot), `i', occ only"
		}

* 3. Occupation x race

	//HOUSEHOLD-LEVEL INCOME
		//1940
		gen log_father_1940_byrace_CWfix = ln(avg_HHinc_byrace_CWfix) 
		label var log_father_1940_byrace_CWfix "Logged father's 1940 income score by race, CW fix"
		
		gen log_father_1940_byrace_nofix = ln(avg_HHinc_1940_byrace) 
		label var log_father_1940_byrace_nofix "Logged father's 1940 income score by race, no fix"
	
		//1960-1990
		foreach d in 1960 1970 1980 1990 {
			gen log_father_`d'_byrace = ln(avg_HHinc_`d'_byocc_byr)
			label var log_father_`d'_byrace "Logged father's `d' income score by race"
		}
				
* 4. Occupation x race x south X edu

	//HOUSEHOLD-LEVEL INCOME
		//1940
		gen log_father_1940_byors_edu = ln(avg_HHinc_byr_bys_edu_CWfix) 
		label var log_father_1940_byors_edu "Logged father's household income, 1940 by race, south, and edu, CW farm fix"
		
		//1960-1990 (no 1950 Census income score at this level)
		foreach d in 1960 1970 1980 1990 {
			gen log_father_`d'_byors_edu = ln(avg_HHinc_`d'_byors_byedu)
			label var log_father_`d'_byors_edu "Logged father's `d' income score by race, south, and edu"
		}
	
* 5. Occupation x race x region

	//HOUSEHOLD-LEVEL INCOME
		//1940
		gen log_father_1940_byreg= ln(avg_HHinc_byr_byreg_CWfix) 
		label var log_father_1940_byreg "Logged father's household income, 1940 by race by region, CW farm fix"

		//1960-1990 (no 1950 Census income score at this level)
		forval i=1960(10)1990 {
			gen log_father_`i'_byreg = ln(avg_HHinc_`i'_byocc_byr_byreg)
			label var log_father_`i'_byreg "Logged father's household income, `i', by race by region"
		}

* 6. Race-only

	//HOUSEHOLD-LEVEL INCOME
		//1940
		gen log_father_1940_byr_only = ln(hh_income_byrace)
		label var log_father_1940_byr_only "Logged father's household income, 1940, by race only"

		//1960-1990 (no 1950 Census income score at this level)
		forval i=1960(10)1990 {
			gen log_father_`i'_byr_only = ln(avg_HHinc_`i'_byrace)
			label var  log_father_`i'_byr_only "Logged father's household income, `i', by race only"
		}

* 7. South-only

	//HOUSEHOLD-LEVEL INCOME
		//1940
		gen log_father_1940_bysouth = ln(hh_income_bysouth)
		label var log_father_1940_bysouth "Logged father's household income, 1940, by south only"

		//1960-1990 (no 1950 Census income score at this level)
		forval i=1960(10)1990 {
			gen log_father_`i'_bysouth = ln(avg_HHinc_`i'_bysouth)
			label var  log_father_`i'_bysouth "Logged father's household income, `i', by south only"
		}

* 8. Occupation x south

	//HOUSEHOLD-LEVEL INCOME
		//1940
		gen log_father_1940_byocc_bys = ln(hh_income_byocc_bys)
		label var log_father_1940_byocc_bys "Logged father's household income, 1940, by occ x south"

		//1960-1990 (no 1950 Census income score at this level)
		forval i=1960(10)1990 {
			gen log_father_`i'_byocc_bys = ln(avg_HHinc_`i'_byocc_bys)
			label var  log_father_`i'_byocc_bys "Logged father's household income, `i', by occ x south"
		}

* 9. Race x south 

	//HOUSEHOLD-LEVEL INCOME
		//1940
		gen log_father_1940_byr_bys_only = ln(hh_income_byr_bys)
		label var log_father_1940_byr_bys_only "Logged father's household income, 1940, by race x south only "

		//1960-1990 (no 1950 Census income score at this level)
		forval i=1960(10)1990 {
			gen log_father_`i'_byr_bys_only = ln(avg_HHinc_`i'_byr_bys)
			label var  log_father_`i'_byr_bys_only "Logged father's household income, `i', by race x south only"
		}

*---------------------------------------------------------------------------------------------------------------------*
*---------------------------------------------------------------------------------------------------------------------*
	
*******************************************
* MERGE IN 1936 INCOME SCORES AND LOG
*******************************************

	merge m:1 fatheroccej race south_merge using "./1_DataSources/ConsumptionSurvey_1936/output/ConsumptionSurvey_1936_IncomeScores.dta"
	assert (fatheroccej==. | fatheroccej==99) | race==. | south_merge==. if _merge==1 
	drop if _merge==2
	drop _merge

	gen log_father_totfaminc_1936 = ln(avg_totfaminc_1936)
	label var log_father_totfaminc_1936 "Logged household income, 1936 by race by South"

************************************************
/* CONSTRUCT INCOME SCORES AT VARIOUS LEVELS 
   THAT MIX 1940 CENSUS AND 1936 INCOME  */
************************************************

* 1. Occupation x race x south
	gen father_HHinc_1936fix = avg_HHinc_1940_byrace_bysouth 
	replace father_HHinc_1936fix= avg_totfaminc_1936 if fatheroccej==81 | fatheroccej==21
	label var father_HHinc_1936fix "Father baseline income score, 1936 farm and self-emp"
	
	gen log_father_HHinc_1936fix = ln(father_HHinc_1936fix)
	label var log_father_HHinc_1936fix "Logged father's baseline HH income, 1936 farm and self-emp. fix"

* 2.  Occupation x race x south (alternative weight: number of children in household)
	gen father_HHinc_1936fix_altwt= avg_HHinc_1940_byr_bys_altwgt 
	replace father_HHinc_1936fix_altwt= avg_totfaminc_1936_altwt if fatheroccej==81 | fatheroccej==21
	label var father_HHinc_1936fix_altwt "Father baseline income score, 1936 farm and self-emp, alt. weight"
	
	gen log_father_1940_byors_altwgt = ln(father_HHinc_1936fix_altwt)
	label var log_father_1940_byors_altwgt "Logged father's HH income, 1936 fix, using children as weight in first stage"
	
* 3. Occupation-only (household-level income)
	gen father_HHinc_1936fix_byocc = avg_HHinc_1940_nofix 
	replace father_HHinc_1936fix_byocc= avg_totfaminc_1936_byocc if fatheroccej==81 | fatheroccej==21
	label var father_HHinc_1936fix_byocc "Father income score, by occ, 1936 farm and self-emp"
	
	gen log_father_HHinc_1936fix_byocc = ln(father_HHinc_1936fix_byocc)
	label var log_father_HHinc_1936fix_byocc "Logged father's HH income, by occ, 1936 farm and self-emp. fix"

* 4. Occupation-only (individual-level income) 
	gen father_owninc_1936fix_byocc = avg_incwage_1940_nofix 
	replace father_owninc_1936fix_byocc= avg_totfaminc_1936_byocc if fatheroccej==81 | fatheroccej==21
	label var father_owninc_1936fix_byocc "Father personal income score, by occ, 1936 farm and self-emp"
	
	gen log_father_owninc_1936fix_byocc = ln(father_owninc_1936fix_byocc)
	label var log_father_owninc_1936fix_byocc "Logged father's personal income, by occ, 1936 farm and self-emp. fix"
	
* 5. Occupation x race
	gen father_HHinc_1936fix_byo_byr = avg_HHinc_1940_byrace 
	replace father_HHinc_1936fix_byo_byr= avg_totfaminc_1936_byocc_byr if fatheroccej==81 | fatheroccej==21
	label var father_HHinc_1936fix_byo_byr "Father income score, by occ x race, 1936 farm and self-emp"
	
	gen log_father_HHinc_1936fix_byo_byr = ln(father_HHinc_1936fix_byo_byr)
	label var log_father_HHinc_1936fix_byo_byr "Logged father's HH income, occ x race, 1936 farm and self-emp. fix"
	
* 6. Occupation x south
	gen father_HHinc_1936fix_byo_bys = hh_income_byocc_bys 
	replace father_HHinc_1936fix_byo_bys = avg_totfaminc_1936_byocc_bys if fatheroccej==81 | fatheroccej==21
	label var father_HHinc_1936fix_byo_bys "Father baseline income score, 1936 farm and self-emp, occ x south"
	
	gen log_father_HHinc_1936fix_byo_bys = ln(father_HHinc_1936fix_byo_bys)
	label var log_father_HHinc_1936fix_byo_bys "Logged father's baseline HH income, 1936 farm and self-emp. fix, occ x south"

	/*Note: The 1940 Census/1936 mixed measures are created
	        to adjust for farmer and self-employed income.
	        No need to create a 1940/1936 mixed income measure at
	        the race-only, south-only, or race x south level, as 
	        they do not vary at the occupation level. */

*---------------------------------------------------------------------------------------------------------------------*
*---------------------------------------------------------------------------------------------------------------------*

*******************************************
* MERGE IN 1900 INCOME SCORES
*******************************************

	gen occ1950ej_PH = fatheroccej
	replace occ1950ej_PH = 28 if fatheroccej==21 //no self employment distinction in 1900 data
	
	merge m:1 occ1950ej_PH race south_merge using "./1_DataSources/1900_IncomeScores/output/IncomeScores_1900_byrace_bysouth.dta"
	assert fatheroccej==. | race==. | south_merge==. if _merge==1
	drop if _merge==2
	drop _merge   
	
	merge m:1 occ1950ej_PH race region_merge using "./1_DataSources/1900_IncomeScores/output/IncomeScores_1900_byrace_byregion.dta"
	assert (fatheroccej==. | fatheroccej==99) | race==. | region_merge==. if _merge==1
	drop if _merge==2
	drop _merge   
	
	merge m:1 occ1950ej_PH race south_merge edu using "./1_DataSources/1900_IncomeScores/output/IncomeScores_1900_byrace_bysouth_byedu.dta"
	assert (fatheroccej==. | fatheroccej==99) | race==. | south_merge==. | edu==. if _merge==1
	drop if _merge==2
	drop _merge 
	
*************************************************
* CONSTRUCT INCOME SCORES THAT MIX 1900 SOURCES
*************************************************

	foreach x in byocc byrace byr_bys byr_byreg byr_bys_edu byr_bys_edu_v2 bysouth {
		gen father_inc_1900_`x' = income_PH_farmfix_`x'
		replace father_inc_1900_`x' = netearn00_adj_`x' if fatheroccej==81
		label var father_inc_1900_`x' "Father income score, 1900 Census of Ag and Preston Haines"
		
		gen log_father_1900_`x' = ln(father_inc_1900_`x')
		label var log_father_1900_`x' "Logged father's income, 1900 Census of Ag and Preston Haines"	
	}

	//Log race-only, south-only and race x south only measures. No fix for farmers.
	foreach v in just_race just_south just_race_south {
		gen log_father_1900_`v' = ln(income_PH_`v')
		label var log_father_1900_`v' "Logged father's income, no farmfix, PH (1900)"
	}
	
	drop income_PH_farmfix* netearn00* occ1950ej_PH
	
	assert log_father_1900_byr_bys==. if log_father_HHinc_1936fix==.
	assert log_father_1900_byocc==. if log_father_HHinc_1936fix_byocc==.
	assert log_father_1900_byrace==. if log_father_HHinc_1936fix_byo_byr==.
	assert log_father_1900_byr_byreg==. if log_father_1940_byreg==.
	assert log_father_1900_byr_bys_edu==. if log_father_1940_byors_edu==.
	assert log_father_1900_byr_bys_edu_v2==. if log_father_1940_byors_edu==.
	assert log_father_1900_bysouth==. if log_father_HHinc_1936fix_byo_bys==.

	/*Note: The 1900 mixed measures are created to adjust for farmer 
	        income. No need to create a 1900 mixed inc measure for
	        the race, south, or race x south level, since they 
	        do not vary at the occupation level. */

*---------------------------------------------------------------------------------------------------------------------*
*---------------------------------------------------------------------------------------------------------------------*
	
**************************************************
* BLENDED (INTERPOLATED) WORKING FATHER INCOME SCORES
**************************************************
/* General approach: Assign father predicted
                     income in the year that
							the adult child respondent 
							turned 10.*/
	gen age10 = dob+10

* 1. BASELINE: Occupation x race x south (household-level income)

	foreach z in interpolated interpolated_CWfix {

		if "`z'"=="interpolated" local measure "log_father_HHinc_1936fix"
		if "`z'"=="interpolated_CWfix" local measure "log_father_1940_byr_bys_CWfix"

		clonevar log_father_1940_byr_bys = `measure' 

		gen log_father_`z' =.
		
		/* 1910-1930 cohorts: Give them a weighted average 
		                      of 1900 and 1940 income scores */
		forval i=1920(1)1940 {
			local j = `i'-1900 //# years away from 1900
			local k = 40-`j' //#  years away from 1940
			
			replace log_father_`z' = ((`k'/40)*log_father_1900_byr_bys) + ((`j'/40)*log_father_1940_byr_bys) if age10==`i'
		}
		
		/* 1931-1950 cohorts: Give them a weighted average 
		                      of 1940 and 1960 income scores */
		forval i=1941(1)1960 {		
			local j=`i'-1940 //# years away from first decade
			local k=20-`j' //# years away from second decade
			
			replace log_father_`z' = ((`k'/20)*log_father_1940_byr_bys) + ((`j'/20)*log_father_1960_byr_bys) if age10==`i'
		}	

    /* 1951-1979 birth cohorts: Give them a weighted average
                                of two Censuses closest to when 
                                the survey respondent turned 10. */
		foreach decade1 in 6 7 8 {
			local decade2 = `decade1'+1
			
				forval i=19`decade1'1(1)19`decade2'0 {
					local j=`i'-19`decade1'0 //# years away from first decade
					local k=10-`j' //# years away from second decade
					
					replace log_father_`z' = ((`k'/10)*log_father_19`decade1'0_byr_bys) + ((`j'/10)*log_father_19`decade2'0_byr_bys) if age10==`i'
				}
		}

		assert log_father_`z'!=. if `measure'!=.
		label var log_father_`z' "Logged father's income, interpolated for each decade"
		drop log_father_1940_byr_bys
	}
		
		assert log_father_interpolated == log_father_interpolated_CWfix if age10>=1960 //Thes measures differ in early cohorts because of CW fixes.
		assert log_father_interpolated!=. if race!=. & fatheroccej<99 & south_merge!=.

*-----------------------------------*
*-----------------------------------*

* 2. Occupation x race x south (individual-level income)

	clonevar log_father_inctot1940_byr_bys = log_father_incwage_CWfix 

	gen log_father_personalinc_interp =.
	
	/* 1910-1930 cohorts: Give them a weighted average 
	                      of 1900 and 1940 income scores */
	forval i=1920(1)1940 {
		local j = `i'-1900 //# years away from 1900
		local k = 40-`j' //# years away from 1940
		
		replace log_father_personalinc_interp = ((`k'/40)*log_father_1900_byr_bys) + ((`j'/40)*log_father_inctot1940_byr_bys) if age10==`i'
	}
	
	/* 1931-1950 cohorts: Give them a weighted average 
	                      of 1940 and 1960 income scores */
	forval i=1941(1)1960 {
	
		local j=`i'-1940 //# years away from first decade
		local k=20-`j' //# years away from second decade
		
		replace log_father_personalinc_interp = ((`k'/20)*log_father_inctot1940_byr_bys) + ((`j'/20)*log_father_inctot1960_byr_bys) if age10==`i'
	}	

   /* 1951-1979 birth cohorts: Give them a weighted average
                               of two Censuses closest to when 
                               the survey respondent turned 10. */
	foreach decade1 in 6 7 8 {
		local decade2 = `decade1'+1
		
			forval i=19`decade1'1(1)19`decade2'0 {
			
				local j=`i'-19`decade1'0 //# years away from first decade
				local k=10-`j' //# years away from second decade
				
				replace log_father_personalinc_interp = ((`k'/10)*log_father_inctot19`decade1'0_byr_bys) + ((`j'/10)*log_father_inctot19`decade2'0_byr_bys) if age10==`i'
			}
	}
	
	assert log_father_personalinc_interp!=. if log_father_incwage_CWfix!=.
	label var log_father_personalinc_interp "Logged father's personal income (occ, race, south), interpolated for each decade"
	drop log_father_inctot1940_byr_bys

*-----------------------------------*
*-----------------------------------*
	
* 3. Occupation x race x south---use closest Census to year that survey respondent turned 10 
	
	//a
	gen log_father_closest_census = .
	replace log_father_closest_census = log_father_HHinc_1936fix if decade==1910 | decade==1920 | decade==1930 
	replace log_father_closest_census = log_father_1960_byr_bys if decade==1940 | decade==1950 
	replace log_father_closest_census = log_father_1970_byr_bys if decade==1960 
	replace log_father_closest_census = log_father_1980_byr_bys if decade==1970 
	
	assert log_father_closest_census!=. if log_father_interpolated!=.
	label var log_father_closest_census "Logged father's personal income (occ, race, south), using closest Census cohort"
	
	//b
	gen log_father_closest_census_v2 = .
	replace log_father_closest_census_v2 = log_father_HHinc_1936fix if decade==1910 | decade==1920 | decade==1930 
	replace log_father_closest_census_v2 = log_father_1950_byr_bys if decade==1940 //only difference
	replace log_father_closest_census_v2 = log_father_1960_byr_bys if decade==1950 
	replace log_father_closest_census_v2 = log_father_1970_byr_bys if decade==1960 
	replace log_father_closest_census_v2 = log_father_1980_byr_bys if decade==1970 
	
	assert log_father_closest_census_v2!=. if log_father_interpolated!=.
	label var log_father_closest_census_v2 "Logged father's personal income (occ, race, south), using closest Census cohort and 1950"

*-----------------------------------*
*-----------------------------------*

* 4. Occupation x race x south (alternate weight: number of children fathers had)

	gen log_father_interp_altwgt =.
	
	/* 1910-1930 cohorts: Give them a weighted average 
	                      of 1900 and 1940 income scores */
	forval i=1920(1)1940 {
		local j = `i'-1900 //# years away from 1900
		local k = 40-`j' //# years away from 1940
		
		replace log_father_interp_altwgt = ((`k'/40)*log_father_1900_byr_bys) + ((`j'/40)*log_father_1940_byors_altwgt) if age10==`i'
	}
	
	/* 1931-1950 cohorts: Give them a weighted average 
	                      of 1940 and 1960 income scores */
	forval i=1941(1)1960 {
		local j=`i'-1940 //# years away from first decade
		local k=20-`j' //# years away from second decade
		
		replace log_father_interp_altwgt = ((`k'/20)*log_father_1940_byors_altwgt) + ((`j'/20)*log_father_1960_byors_altwgt) if age10==`i'
	}	

   /* 1951-1979 birth cohorts: Give them a weighted average
                               of two Censuses closest to when 
                               the survey respondent turned 10. */
	foreach decade1 in 6 7 8 {
		local decade2 = `decade1'+1
		
			forval i=19`decade1'1(1)19`decade2'0 {
				local j=`i'-19`decade1'0 //# years away from first decade
				local k=10-`j' //# years away from second decade
				
				replace log_father_interp_altwgt = ((`k'/10)*log_father_19`decade1'0_byors_altwgt) + ((`j'/10)*log_father_19`decade2'0_byors_altwgt) if age10==`i'
			}
	}

	assert log_father_interp_altwgt!=. if log_father_interpolated!=.
	label var log_father_interp_altwgt "Logged father's income, interpolated for each decade, alternative weight"

*-----------------------------------*
*-----------------------------------*

*	5. Occupation x race x south---versions for Appendix C recall bias exercises
	//version 1:
	gen log_father_interp_rbias_v1 =.	
		replace log_father_interp_rbias_v1 = (1/2)*log_father_1900_byr_bys + (1/2)*log_father_HHinc_1936fix if decade==1910
		replace log_father_interp_rbias_v1 = (1/3)*log_father_1900_byr_bys + (2/3)*log_father_HHinc_1936fix if decade==1920
		replace log_father_interp_rbias_v1 = log_father_HHinc_1936fix if decade==1930
		replace log_father_interp_rbias_v1 = (1/2)*log_father_HHinc_1936fix + (1/2)*log_father_1960_byr_bys if decade==1940
		replace log_father_interp_rbias_v1 = log_father_1960_byr_bys if decade==1950
		replace log_father_interp_rbias_v1 = log_father_1970_byr_bys if decade==1960
		replace log_father_interp_rbias_v1 = log_father_1980_byr_bys if decade==1970
	label var log_father_interp_rbias_v1 "Logged father's income, interp, matches Census measure in recall bias ex, V1"

	//version 2:
	gen log_father_interp_rbias_v2 =.	
		replace log_father_interp_rbias_v2 = (1/3)*log_father_1900_byr_bys + (2/3)*log_father_HHinc_1936fix if decade==1910
		replace log_father_interp_rbias_v2 = log_father_HHinc_1936fix if decade==1920
		replace log_father_interp_rbias_v2 = (1/2)*log_father_HHinc_1936fix + (1/2)*log_father_1960_byr_bys if decade==1930
		replace log_father_interp_rbias_v2 = log_father_1960_byr_bys if decade==1940
		replace log_father_interp_rbias_v2 = log_father_1970_byr_bys if decade==1950
		replace log_father_interp_rbias_v2 = log_father_1980_byr_bys if decade==1960
		replace log_father_interp_rbias_v2 = log_father_1990_byr_bys if decade==1970

	label var log_father_interp_rbias_v2 "Logged father's income, interp, matches Census measure in recall bias ex, V2"

*-----------------------------------*
*-----------------------------------*

* 6. Occupation x south

	foreach z in bysouth  {

		if "`z'"=="bysouth" local measure "log_father_HHinc_1936fix_byo_bys" 
		clonevar log_father_1940_bysouthonly=`measure' 
		
		gen log_father_byoccsouth_interp =.
		
		/* 1910-1930 cohorts: Give them a weighted average 
	                         of 1900 and 1940 income scores */
		forval i=1920(1)1940 {
			local j = `i'-1900 //# years away from 1900
			local k = 40-`j' //# years away from 1940
			
			replace log_father_byoccsouth_interp = ((`k'/40)*log_father_1900_bysouth) + ((`j'/40)*log_father_1940_bysouthonly) if age10==`i'
		}
		
		/* 1931-1950 cohorts: Give them a weighted average 
	                         of 1940 and 1960 income scores */
		forval i=1941(1)1960 {
		
			local j=`i'-1940 //# years away from first decade
			local k=20-`j' //# years away from second decade
			
			replace log_father_byoccsouth_interp = ((`k'/20)*log_father_1940_bysouthonly) + ((`j'/20)*log_father_1960_byocc_bys) if age10==`i'
		}	

   	/* 1951-1979 birth cohorts: Give them a weighted average
                                  of two Censuses closest to when 
                                  the survey respondent turned 10. */
		foreach decade1 in 6 7 8 {
			local decade2 = `decade1'+1
			
				forval i=19`decade1'1(1)19`decade2'0 {
				
					local j=`i'-19`decade1'0 //# years away from first decade
					local k=10-`j' //# years away from second decade
					
					replace log_father_byoccsouth_interp = ((`k'/10)*log_father_19`decade1'0_byocc_bys) + ((`j'/10)*log_father_19`decade2'0_byocc_bys) if age10==`i'
				}
		}

		assert log_father_byoccsouth_interp!=. if `measure'!=. & age10!=. & (fatheroccej!=. & fatheroccej!=99 & south_merge!=.) //Note: no 1900 income score for non-working fathers
		label var log_father_byoccsouth_interp "Logged father's income, occ x south, interpolated for each decade"
		drop log_father_1940_bysouthonly
		
	}

*-----------------------------------*
*-----------------------------------*
	
* 7. Race-only, south-only, and race x south levels

	foreach z in just_race just_south just_race_south  {

	noisily display "`z'"

		if "`z'"=="just_race" {
			local name "just_race"
			local 1940measure "log_father_1940_byr_only"
			local suffix "byr_only" 
			local cond "race!=. & fatheroccej<99 & south_merge!=."
			local cell "race only"
		}

		if "`z'"=="just_south" {
			local name "just_south"
			local 1940measure "log_father_1940_bysouth"
			local suffix "bysouth"
			local cond "race!=. & fatheroccej<99 & south_merge!=." 
			local cell "south only"
		}

		if "`z'"=="just_race_south" {
			local name "just_rs"
			local 1940measure "log_father_1940_byr_bys_only"
			local suffix "byr_bys_only"
			local cond "race!=. & fatheroccej<99 & south_merge!=." 
			local cell "race x south only"
		}

		clonevar log_father_1940_`z'= `1940measure' 

		gen log_father_`name'_interp =.
		
		/* 1910-1930 cohorts: Give them a weighted average 
	                         of 1900 and 1940 income scores */
		forval i=1920(1)1940 {
			local j = `i'-1900 //# years away from 1900
			local k = 40-`j' //# years away from 1940
			
			replace log_father_`name'_interp = ((`k'/40)*log_father_1900_`z') + ((`j'/40)*log_father_1940_`z') if age10==`i'
		}
		
		/* 1931-1950 cohorts: Give them a weighted average 
	                         of 1940 and 1960 income scores */
		forval i=1941(1)1960 {
		
			local j=`i'-1940 //# years away from first decade
			local k=20-`j' //# years away from second decade
			
			replace log_father_`name'_interp = ((`k'/20)*log_father_1940_`z') + ((`j'/20)*log_father_1960_`suffix') if age10==`i'
		}	

   	/* 1951-1979 birth cohorts: Give them a weighted average
                                  of two Censuses closest to when 
                                  the survey respondent turned 10. */
		foreach decade1 in 6 7 8 {
			local decade2 = `decade1'+1
			
				forval i=19`decade1'1(1)19`decade2'0 {
				
					local j=`i'-19`decade1'0 //# years away from first decade
					local k=10-`j' //# years away from second decade
					
					replace log_father_`name'_interp = ((`k'/10)*log_father_19`decade1'0_`suffix') + ((`j'/10)*log_father_19`decade2'0_`suffix') if age10==`i'
				}
		}

		assert log_father_`name'_interp!=. if `1940measure'!=. & age10!=. & `cond'
		label var log_father_`name'_interp "Logged father's income, `cell', interpolated for each decade"
		drop log_father_1940_`z'
		
	}
	
************************************************************************************
/* SUPER IMPORTANT: EXCLUDE NON-WORKING DADS FROM ALL BLENDED MEASURES UP TO NOW */
************************************************************************************

	local blended_workdads "log_father_interpolated log_father_interpolated_CWfix log_father_personalinc_interp log_father_closest_census log_father_closest_census_v2 log_father_interp_altwgt log_father_interp_rbias_v1 log_father_interp_rbias_v2 log_father_byoccsouth_interp log_father_just_race_interp log_father_just_south_interp log_father_just_rs_interp" 
	
	foreach b of local blended_workdads  {
	
		sum `b' if father_notworking==1, d
		replace `b' =. if father_notworking==1
		sum `b' if father_notworking==1
		
	}
	
/* Verify: survey respondents have non-missing blended 
           occ x race x south income scores if they have 
           non-missing occupation, race, and grew up south. */
	foreach b of local blended_workdads  {
		assert `b'!=. if fatheroccej<99 & race!=. & south_merge!=.
	}
	
*****************************************
* MORE BLENDED FATHER INCOME SCORES
*****************************************

* 8. Occupation-only (household-level income)

	foreach z in byocc_interp byocc_interp_CWfix {

		if "`z'"=="byocc_interp" local measure "log_father_HHinc_1936fix_byocc"
		if "`z'"=="byocc_interp_CWfix" local measure "log_father_1940occ_CWfix"

		clonevar log_father_1940occ=`measure' 

		gen log_father_`z' =.
		
		/* 1910-1930 cohorts: Give them a weighted average 
	                         of 1900 and 1940 income scores */
		forval i=1920(1)1940 {
			local j = `i'-1900 //# years away from 1900
			local k = 40-`j' //# years away from 1940
			
			replace log_father_`z' = ((`k'/40)*log_father_1900_byocc) + ((`j'/40)*log_father_1940occ) if age10==`i'
		}
		
		/* 1931-1950 cohorts: Give them a weighted average 
	                         of 1940 and 1960 income scores */
		forval i=1941(1)1960 {
		
			local j=`i'-1940 //# years away from first decade
			local k=20-`j' //# years away from second decade
			
			replace log_father_`z' = ((`k'/20)*log_father_1940occ) + ((`j'/20)*log_father_1960occ) if age10==`i'
		}	

   	/* 1951-1979 birth cohorts: Give them a weighted average
                                  of two Censuses closest to when 
                                  the survey respondent turned 10. */
		foreach decade1 in 6 7 8 {
			local decade2 = `decade1'+1
			
				forval i=19`decade1'1(1)19`decade2'0 {
				
					local j=`i'-19`decade1'0 //# years away from first decade
					local k=10-`j' //# years away from second decade
					
					replace log_father_`z' = ((`k'/10)*log_father_19`decade1'0occ) + ((`j'/10)*log_father_19`decade2'0occ) if age10==`i'
				}
		}

		assert log_father_`z'!=. if `measure'!=. & age10!=. & (fatheroccej!=. & fatheroccej!=99 & race!=. & south_merge!=.) //Note: no 1900 income score for non-working dads.
		label var log_father_`z' "Logged father's income, by occ, interpolated for each decade"
		drop log_father_1940occ
	}
	
	assert log_father_byocc_interp == log_father_byocc_interp_CWfix if age10>=1960 & age10<. & race!=. //These measures differ for early cohorts because of CW fixes.

*-----------------------------------*
*-----------------------------------*

* 9. Occupation-only (individual-level income)

	clonevar log_father_1940occ= log_father_owninc_1936fix_byocc 

	gen log_father_byocc_personal =.
	
	/* 1910-1930 cohorts: Give them a weighted average 
                         of 1900 and 1940 income scores */
	forval i=1920(1)1940 {
		local j = `i'-1900 //# years away from 1900
		local k = 40-`j' //# years away from 1940
		
		replace log_father_byocc_personal = ((`k'/40)*log_father_1900_byocc) + ((`j'/40)*log_father_1940occ) if age10==`i'
	}
	
	/* 1931-1950 cohorts: Give them a weighted average 
                         of 1940 and 1960 income scores */
	forval i=1941(1)1960 {
	
		local j=`i'-1940 //# years away from first decade
		local k=20-`j' //# years away from second decade
		
		replace log_father_byocc_personal = ((`k'/20)*log_father_1940occ) + ((`j'/20)*log_father_inctot1960_occ) if age10==`i'
	}	

	/* 1951-1979 birth cohorts: Give them a weighted average
                               of two Censuses closest to when 
                               the survey respondent turned 10. */
	foreach decade1 in 6 7 8 {
		local decade2 = `decade1'+1
		
			forval i=19`decade1'1(1)19`decade2'0 {
			
				local j=`i'-19`decade1'0 //# years away from first decade
				local k=10-`j' //# years away from second decade
				
				replace log_father_byocc_personal = ((`k'/10)*log_father_inctot19`decade1'0_occ) + ((`j'/10)*log_father_inctot19`decade2'0_occ) if age10==`i'
			}
	}

	assert log_father_byocc_personal!=. if log_father_byocc_interp!=. & age10!=. & (fatheroccej!=. & fatheroccej!=99 & race!=. & south_merge!=.)
	label var log_father_byocc_personal "Logged father's personal income, by occ, interpolated for each decade"
	drop log_father_1940occ
		
*-----------------------------------*
*-----------------------------------*

* 10. Occupation x race

	foreach z in byrace_interp byrace_interp_CWfix {

		if "`z'"=="byrace_interp" local measure "log_father_HHinc_1936fix_byo_byr"
		if "`z'"=="byrace_interp_CWfix" local measure "log_father_1940_byrace_CWfix"

		clonevar log_father_1940_byrace=`measure' 

		gen log_father_`z' =.
		
		/* 1910-1930 cohorts: Give them a weighted average 
	                         of 1900 and 1940 income scores */
		forval i=1920(1)1940 {
			local j = `i'-1900 //# years away from 1900
			local k = 40-`j' //# years away from 1940
			
			replace log_father_`z' = ((`k'/40)*log_father_1900_byrace) + ((`j'/40)*log_father_1940_byrace) if age10==`i'
		}
		
		/* 1931-1950 cohorts: Give them a weighted average 
	                         of 1940 and 1960 income scores */
		forval i=1941(1)1960 {
		
			local j=`i'-1940 //# years away from first decade
			local k=20-`j' //# years away from second decade
			
			replace log_father_`z' = ((`k'/20)*log_father_1940_byrace) + ((`j'/20)*log_father_1960_byrace) if age10==`i'
		}	

		/* 1951-1979 birth cohorts: Give them a weighted average
	                               of two Censuses closest to when 
	                               the survey respondent turned 10. */
		foreach decade1 in 6 7 8 {
			local decade2 = `decade1'+1
			
				forval i=19`decade1'1(1)19`decade2'0 {
				
					local j=`i'-19`decade1'0 //# years away from first decade
					local k=10-`j' //# years away from second decade
					
					replace log_father_`z' = ((`k'/10)*log_father_19`decade1'0_byrace) + ((`j'/10)*log_father_19`decade2'0_byrace) if age10==`i'
				}
		}

		assert log_father_`z'!=. if `measure'!=. & age10!=. & (fatheroccej!=. & fatheroccej!=99 & race!=. & south_merge!=.)
		label var log_father_`z' "Logged father's income, occ x race, interpolated for each decade"
		drop log_father_1940_byrace
		
	}
		
	assert log_father_byrace_interp == log_father_byrace_interp_CWfix if age10>=1960 & age10<. //These measures differ for early cohorts because of CW fixes.

*-----------------------------------*
*-----------------------------------*

* 11. Occupation x race x region

	gen log_father_byregion_interp =.
	
	/* 1910-1930 cohorts: Give them a weighted average 
                         of 1900 and 1940 income scores */
	forval i=1920(1)1940 {
		local j = `i'-1900 //# years away from 1900
		local k = 40-`j' //# years away from 1940
		
		replace log_father_byregion_interp = ((`k'/40)*log_father_1900_byr_byreg) + ((`j'/40)*log_father_1940_byreg) if age10==`i'
	}

	/* 1931-1950 cohorts: Give them a weighted average 
                         of 1940 and 1960 income scores */
	forval i=1941(1)1960 {
	
		local j=`i'-1940 //# years away from first decade
		local k=20-`j' //# years away from second decade
		
		replace log_father_byregion_interp = ((`k'/20)*log_father_1940_byreg) + ((`j'/20)*log_father_1960_byreg) if age10==`i'
	}
	
	/* 1951-1979 birth cohorts: Give them a weighted average
                               of two Censuses closest to when 
                               the survey respondent turned 10. */
	foreach decade1 in 6 7 8 {
		local decade2 = `decade1'+1
		
			forval i=19`decade1'1(1)19`decade2'0 {
			
				local j=`i'-19`decade1'0 //# years away from first decade
				local k=10-`j' //# years away from second decade
				
				replace log_father_byregion_interp = ((`k'/10)*log_father_19`decade1'0_byreg) + ((`j'/10)*log_father_19`decade2'0_byreg) if age10==`i'
			}
	}

	assert log_father_byregion_interp!=. if log_father_1940_byreg!=. 
	label var log_father_byregion_interp "Logged father's income, by race by region, interpolated for each decade"
	
	assert log_father_byregion_interp!=. if race!=. & fatheroccej<99 & region_merge!=.

*-----------------------------------*
*-----------------------------------*

* 12. Occupation x race x south x edu 

	gen log_father_byors_edu_interp =.
	
	/* 1910-1930 cohorts: Give them a weighted average 
                         of 1900 and 1940 income scores */
	forval i=1920(1)1940 {
		local j = `i'-1900 //# years away from 1900
		local k = 40-`j' //# years away from 1940
		
		replace log_father_byors_edu_interp = ((`k'/40)*log_father_1900_byr_bys_edu_v2) + ((`j'/40)*log_father_1940_byors_edu) if age10==`i'
	}

	/* 1931-1950 cohorts: Give them a weighted average 
                         of 1940 and 1960 income scores */
	forval i=1941(1)1960 {
	
		local j=`i'-1940 //# years away from first decade
		local k=20-`j' //# years away from second decade
		
		replace log_father_byors_edu_interp = ((`k'/20)*log_father_1940_byors_edu) + ((`j'/20)*log_father_1960_byors_edu) if age10==`i'
	}
	
	/* 1951-1979 birth cohorts: Give them a weighted average
                               of two Censuses closest to when 
                               the survey respondent turned 10. */
	foreach decade1 in 6 7 8 {
		local decade2 = `decade1'+1
		
			forval i=19`decade1'1(1)19`decade2'0 {
			
				local j=`i'-19`decade1'0 //# years away from first decade
				local k=10-`j' //# years away from second decade
				
				replace log_father_byors_edu_interp = ((`k'/10)*log_father_19`decade1'0_byors_edu) + ((`j'/10)*log_father_19`decade2'0_byors_edu) if age10==`i'
			}
	}

	assert log_father_byors_edu_interp!=. if log_father_1940_byors_edu!=. & age10!=.  
	label var log_father_byors_edu_interp "Logged father's income, by race, south, and edu, interpolated for each decade"

*-----------------------------------*
*-----------------------------------*

* 13. Equivalized blended income (occ x race x south level)

	//merge in median family size at age 10 (from the Census)
	merge m:1 age10 fatheroccej race south_merge using "./1_DataSources/CensusData/output/MedianFamilySize_byage10.dta"
	assert (fatheroccej==. | fatheroccej==99) | race==. | south_merge==. | age10==. if _merge==1
	drop if _merge==2
	drop _merge

	gen exp_interpolated_income = exp(log_father_interpolated) 
	
	//adjust blended income for family size
	gen father_baseline_equivalized = exp_interpolated_income / sqrt(number_member_family)
	label var father_baseline_equivalized "Interpolated father income / by sqrt(median fam size)"
	
	gen log_father_equivalized = ln(father_baseline_equivalized)
	label var log_father_equivalized "Log of equivalized father income"

*-----------------------------------*
*-----------------------------------*
	
* 14. Occupation x race x south (baseline + non-working fathers)

	clonevar log_father_1940_byr_bys = log_father_HHinc_1936fix 

	gen father_baseline_plus_notworking =.
	
	/* 1910-1930 cohorts: Give them a weighted average 
                         of 1900 and 1940 income scores */
	forval i=1920(1)1940 {
		local j = `i'-1900 //# years away from 1900
		local k = 40-`j' //# years away from 1940
		
		replace father_baseline_plus_notworking = ((`k'/40)* log_father_1900_byr_bys) + ((`j'/40)*log_father_1940_byr_bys) if age10==`i'
	}
	
	/* 1931-1950 cohorts: Give them a weighted average 
                         of 1940 and 1960 income scores */
	forval i=1941(1)1960 {
	
		local j=`i'-1940 //# years away from first decade
		local k=20-`j' //# years away from second decade
		
		replace father_baseline_plus_notworking = ((`k'/20)*log_father_1940_byr_bys) + ((`j'/20)*log_father_1960_byr_bys) if age10==`i'
	}

	/* 1951-1979 birth cohorts: Give them a weighted average
                               of two Censuses closest to when 
                               the survey respondent turned 10. */
	foreach decade1 in 6 7 8 {
		local decade2 = `decade1'+1
		
			forval i=19`decade1'1(1)19`decade2'0 {
			
				local j=`i'-19`decade1'0 //# years away from first decade
				local k=10-`j' //# years away from second decade
				
				replace father_baseline_plus_notworking = ((`k'/10)*log_father_19`decade1'0_byr_bys) + ((`j'/10)*log_father_19`decade2'0_byr_bys) if age10==`i'
			}
	}

	assert father_baseline_plus_notworking!=. if log_father_1940_byr_bys!=.
	label var father_baseline_plus_notworking "Logged father's baseline (interpolated) income, adding non-working fathers"
	drop log_father_1940_byr_bys
	
	assert fatheroccej==99 if father_baseline_plus_notworking!=log_father_interpolated 

*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*

************************************
* RENAME SOME BLENDED MEASURES 
************************************

	//baseline: occ x race x south
	gen father_income_baseline = log_father_interpolated
	label var father_income_baseline "Father income, (logged) baseline measure"
	
	clonevar log_father_baseline = father_income_baseline 
	label var log_father_baseline "Logged father income, baseline measure (clone of father_income_baseline)"
	
	//other levels 
	gen father_income_byocc = log_father_byocc_interp
	label var father_income_byocc "Logged father income, occupation measure"
	
	gen father_income_byrace = log_father_byrace_interp
	label var father_income_byrace "Logged father income, occ x race"
	
	gen father_income_byregion = log_father_byregion_interp
	label var father_income_byregion "Logged father income, occ x race x region measure"
	
	gen father_income_byedu = log_father_byors_edu_interp
	label var father_income_byedu "Logged father income, occ x race x south x edu measure"
	
*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*

***************************************************
* BLENDED INCOME SCORE THAT ADDS WORKING MOTHERS
***************************************************

	label var motheroccej "Mother's coarsened occupation"
	
* Merge in Census income scores
	merge m:1 motheroccej race south_merge using "./1_DataSources/CensusData/output/IncomeScores_Coarsened_byrace_bysouth_moms.dta"
	assert motheroccej==. | race==. | south_merge==. if _merge==1
	drop if _merge==2
	drop _merge

* Log mother income scores
	
	//1940 =
	gen log_mother_1940_byr_bys  = ln(mom_HHinc_byr_bys_CWfix)
	label var log_mother_1940_byr_bys "Logged mother's household income, 1940, by race by south, CW fix"

	//1960-1990
	forval i=1960(10)1990 {
		gen log_mother_`i'_byr_bys = ln(mom_avg_HHinc_`i'_byocc_rs)
		label var log_mother_`i'_byr_bys "Logged mother's household income, `i', by race by south"
	}

* Create blended measure 
	gen log_mother_interpolated =.

	/* 1910-1930 cohorts: Give 1940 income score (CW fix).
	                      1900 measures for mothers are 
	                      not microdata. */
	replace log_mother_interpolated = log_mother_1940_byr_bys if age10<=1940
	
	/* 1931-1950 cohorts: Give them a weighted average 
                         of 1940 and 1960 income scores */
	forval i=1941(1)1960 {
	
		local j=`i'-1940 //# years away from first decade
		local k=20-`j' //# years away from second decade
		
		replace log_mother_interpolated = ((`k'/20)*log_mother_1940_byr_bys) + ((`j'/20)*log_mother_1960_byr_bys) if age10==`i'
	}

	/* 1951-1979 birth cohorts: Give them a weighted average
                               of two Censuses closest to when 
                               the survey respondent turned 10. */
	foreach decade1 in 6 7 8 {
		local decade2 = `decade1'+1
		
			forval i=19`decade1'1(1)19`decade2'0 {
			
				local j=`i'-19`decade1'0 //# of years away from first decade
				local k=10-`j' //# of years away from second decade
				
				replace log_mother_interpolated = ((`k'/10)*log_mother_19`decade1'0_byr_bys) + ((`j'/10)*log_mother_19`decade2'0_byr_bys) if age10==`i'
			}
	}
	
	assert log_mother_interpolated!=. if log_mother_1940_byr_bys!=. & age10!=.
	label var log_mother_interpolated "Logged mother's household income (race x south), interp. 1940-1990, CW fix"

*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*

********************************************	
* BLENDED PARENTAL INCOME MEASURES
********************************************

	local mom_inc "log_mother_interpolated" 

/* 1. When working father blended income is 
      not available, assign mother blended income.  */
	gen parent_income = father_income_baseline 
	replace parent_income = `mom_inc' if father_income_baseline==. & `mom_inc'!=. & incdata==1
	label var parent_income "Parent baseline (logged) inc: mom inc. if dad occ. missing & mom occ. available"

/* 2. If neither working father no working mother
      blended income is available, assign non-working
      father income. */
	gen parent_income_all = parent_income  
	replace parent_income_all = father_baseline_plus_notworking  if parent_income_all==. & father_baseline_plus_notworking!=. & incdata==1 
	label var parent_income_all "Baseline (logged) income for fathers, plus mothers and unemployed fathers"
	
/* 3. If parental income is still missing,
      assign a "0" to black and white respodents. */
	gen parent_income_wzeros = parent_income_all
	replace parent_income_wzeros =0 if parent_income_all==. & incdata==1 & (race==1 | race==2)
	label var parent_income_wzeros "Parent income, missing replaced as 0" 
	
*---------------------------------------------------------------------------------------------------------------------*
*---------------------------------------------------------------------------------------------------------------------*

***************************
* SAMPLES
***************************

* MAIN SAMPLE
	gen baseline_sample=1 if father_income_baseline!=. & fam_inc_real!=.  

* Alternative samples: 
	//1. Excluding respondents with fathers in farming occupations
	gen nonfarm_sample=1 if baseline_sample==1 & fatherfarm!=1 
	
	//2. Father education is available
	gen edu_sample=1 if  father_income_byedu!=. & fam_inc_real!=. 
	
	//3. Father's region of residence is available
	gen region_sample = 1 if father_income_byregion!=. & fam_inc_real!=.  
	
	//4. Father income is available (working or non-working father)
	gen father_notworking_sample=1 if father_baseline_plus_notworking!=. & fam_inc_real!=.  
	
	//5. *Working* parental income is available (i.e., working fathers or working mothers)
	gen parent_sample=1 if parent_income!=. & fam_inc_real!=.  
	
	//6. Parental income is available (i.e., working father, working mother, or non-working father income)
	gen parent_all_sample=1 if parent_income_all!=. & fam_inc_real!=.
	
	//7. Parental income (+0s) available 
	gen zeros_sample=1 if parent_income_wzeros!=. & fam_inc_real!=. 
	
	//8. Equivalized income (household size) available
	gen equivalized_sample = 1 if son_baseline_equivalized!=. & father_baseline_equivalized!=.
	
	//9. Nationally representative 
	gen common_survey_sample=1 if baseline_sample==1 & natrepdata==1

	//10. Non-NFS 
	gen non_nfs_sample =1 if baseline_sample==1 & data!="nfs"
	
	//11. Survey sampling occurs at the household-level 
	gen hh_based_sample =.
	foreach surv in anes avtmh57 avtmh76 gss nfs nsba ocg62 ocg73 {
		replace hh_based_sample =1 if baseline_sample==1 & data=="`surv'"
	}
	
	//12. Excluding the three most heavily top-coded datasets 
	gen non_topcod_sample = 1 if baseline_sample==1 & data!="nsfh" & data!="nfs" & data!="nlsyw68"
	
	global samples "baseline nonfarm edu region zeros equivalized parent parent_all father_notworking common_survey non_nfs hh_based non_topcod" 

*----------------------------*
* DUMMIES
*----------------------------*

	gen white_male = (black==0 & sex==1) if (black~=. & sex~=.)
	tab white_male, m

	gen white_female = (black==0 & sex==2) if (black~=. & sex~=.)
	tab white_female, m

	gen nonw_male = (black==1 & sex==1) if (black~=. & sex~=.)
	tab nonw_male, m

	gen nonw_female = (black==1 & sex==2) if (black~=. & sex~=.)
	tab nonw_female, m
	
**********************************************************************************
** GENERATING CENSUS-BASED WEIGHTS TO ACCOUNT FOR NONREPRESENTATIVE SURVEYS **
**********************************************************************************

*------------------------------------*
* 1. Time-varying race x sex weights
*------------------------------------*

* Merge in race x sex shares from Census 
	gen age40 = decade+40 
	merge m:1 age40 using "./1_DataSources/CensusData/output/Census_shares_byrace_bysex.dta", assert(3) nogen
	
	foreach group in $samples {

		gen wgt_temp=. 

		forval y = 1910(10)1970 {
		
			* Store Census shares
			sum white_men_census if decade==`y'
			local wm = `r(mean)'
			
			sum white_women_census if decade==`y'
			local ww = `r(mean)'
			
			sum black_men_census if decade==`y'
			local bm = `r(mean)'
			
			sum black_women_census if decade==`y'
			local bw = `r(mean)'
			
			* Scale Census share by survey share
			quietly sum white_male if decade==`y' & `group'_sample==1 [aw=weight_center] 
			replace wgt_temp=`wm'/`r(mean)' if (white_male==1 & decade==`y' & `group'_sample==1)
			
			quietly sum white_female if decade==`y' & `group'_sample==1 [aw=weight_center] 
			replace wgt_temp=`ww'/`r(mean)' if (white_female==1 & decade==`y' & `group'_sample==1)

			quietly sum nonw_male if decade==`y' & `group'_sample==1 [aw=weight_center] 
			replace wgt_temp=`bm'/`r(mean)' if (nonw_male==1 & decade==`y' & `group'_sample==1)

			quietly sum nonw_female if decade==`y' & `group'_sample==1 [aw=weight_center] 
			replace wgt_temp=`bw'/`r(mean)' if (nonw_female==1 & decade==`y' & `group'_sample==1)
		
		}

		* Scale harmonized survey weight by wgt_temp
		gen wgt_sex_race_`group' = wgt_temp*weight_center
		drop wgt_temp
	}
	
	foreach group in $samples {
		forval y = 1910(10)1970 {
			sum white_male if decade==`y' & `group'_sample==1 [aw=wgt_sex_race_`group'] 
			sum white_female if decade==`y' & `group'_sample==1 [aw=wgt_sex_race_`group'] 
			sum nonw_male if decade==`y' & `group'_sample==1 [aw=wgt_sex_race_`group'] 
			sum nonw_female if decade==`y' & `group'_sample==1 [aw=wgt_sex_race_`group'] 	
		}
	}
	
	rename wgt_sex_race_baseline wgt_sex_race
	drop white_men_census white_women_census black_men_census black_women_census
	
*-----------------------------------------*
* 2. Time-invariant race x sex weights
*-----------------------------------------*

	foreach group in baseline {

		gen wgt_temp=. 

		forval y = 1910(10)1970 {

			sum white_male if decade==`y' & `group'_sample==1 [aw=weight_center] 
			replace wgt_temp=.44/`r(mean)' if (white_male==1 & decade==`y' & `group'_sample==1)
			tab wgt_temp, m
			
			quietly sum white_female if decade==`y' & `group'_sample==1 [aw=weight_center] 
			replace wgt_temp=.44/`r(mean)' if (white_female==1 & decade==`y' & `group'_sample==1)

			quietly sum nonw_male if decade==`y' & `group'_sample==1 [aw=weight_center] 
			replace wgt_temp=.06/`r(mean)' if (nonw_male==1 & decade==`y' & `group'_sample==1)

			quietly sum nonw_female if decade==`y' & `group'_sample==1 [aw=weight_center] 
			replace wgt_temp=.06/`r(mean)' if (nonw_female==1 & decade==`y' & `group'_sample==1)
		
		}

		gen wgt_sex_race_`group' = wgt_temp*weight_center
		drop wgt_temp
	}
	
	foreach group in baseline {
		forval y = 1910(10)1970 {
				sum white_male if decade==`y' & `group'_sample==1 [aw=wgt_sex_race_`group'] 
				sum white_female if decade==`y' & `group'_sample==1 [aw=wgt_sex_race_`group'] 
				sum nonw_male if decade==`y' & `group'_sample==1 [aw=wgt_sex_race_`group'] 
				sum nonw_female if decade==`y' & `group'_sample==1 [aw=wgt_sex_race_`group'] 		
		}
	}
	
* Rename for simplicity
	rename wgt_sex_race_baseline wgt_sex_race_sameshares

*------------------------------------------------------*
* 3. Time-varying race x sex x edu x age weights
*------------------------------------------------------*

	gen age40_racesex_eduage = decade+40 
	merge m:1 age40_racesex_eduage using "./1_DataSources/CensusData/output/Census_shares_byrace_bysex_byedu_byage.dta", assert(3) nogen
	
    gen age_bin =.
    replace age_bin =1 if inrange(age,30,35) //this bin will get 6 years
    replace age_bin =2 if inrange(age,36,40)
    replace age_bin =3 if inrange(age,41,45)
    replace age_bin =4 if inrange(age,46,50)
    tab age_bin, m

    local condit "if (black~=. & sex~=. & hs_ed~=. & age_bin~=.)"

* Dummies for sex-race-age-edu characteristics of respondent
	foreach r in 0 1 {
		
		if `r'==0 local r_lab "white"
		if `r'==1 local r_lab "nonw"

		foreach s in 1 2 {
			
			if `s'==1 local s_lab "_male"     
			if `s'==2 local s_lab "_female"  

			foreach edu_l in 0 1 {
				
				if `edu_l'==0 local ed "_no_hsed"
				if `edu_l'==1 local ed "_hsed"
				
				foreach age_b in 1 2 3 4 {
					gen `r_lab'`s_lab'`ed'_`age_b' = (black==`r' & sex==`s' & hs_ed==`edu_l' & age_bin==`age_b') `condit'
				}

			}
		}
	}

	
local vars "white_male_no_hsed_1 white_male_no_hsed_2 white_male_no_hsed_3 white_male_no_hsed_4 white_male_hsed_1 white_male_hsed_2 white_male_hsed_3 white_male_hsed_4 white_female_no_hsed_1 white_female_no_hsed_2 white_female_no_hsed_3 white_female_no_hsed_4 white_female_hsed_1 white_female_hsed_2 white_female_hsed_3 white_female_hsed_4 nonw_male_no_hsed_1 nonw_male_no_hsed_2 nonw_male_no_hsed_3 nonw_male_no_hsed_4 nonw_male_hsed_1 nonw_male_hsed_2 nonw_male_hsed_3 nonw_male_hsed_4 nonw_female_no_hsed_1 nonw_female_no_hsed_2 nonw_female_no_hsed_3 nonw_female_no_hsed_4 nonw_female_hsed_1 nonw_female_hsed_2 nonw_female_hsed_3 nonw_female_hsed_4"

* Make shares + adjusted weight
	foreach group in baseline {

		gen wgt_temp=. 

		forval y = 1910(10)1970 {

		    foreach v of local vars {

		    	sum `v'_census if decade==`y'
		    	local cshare = `r(mean)'

		    	quietly summ `v' if decade==`y' & `group'_sample==1 [aw=weight_center] 
				replace wgt_temp = `cshare'/`r(mean)' if (`v'==1 & decade==`y' & `group'_sample==1)
			}

		}
			gen wgt_sex_race_eduage_`group' = wgt_temp*weight_center
			drop wgt_temp
	}
			
			rename wgt_sex_race_eduage_baseline wgt_sex_race_eduage
			drop *male_no* *male_hs* age_bin 
			assert  wgt_sex_race_eduage==. if wgt_sex_race==.
		
*-------------------------------------------------------------------------------------------------------------------------------*
*-------------------------------------------------------------------------------------------------------------------------------*

***************************************
*** RANKS---MANY VARIATIONS 
***************************************

/* BASELINE SPECIFICATION: Rank total family income and 
                           father predicted income at 
                           the preferred occ x race x south 
                           level. Weights adjusted for sex 
                           x race shares in the Census.
*/
	bysort dob: egen N_byDOB_baselinesample = sum(baseline_sample) 

	foreach var of varlist fam_inc_real father_income_baseline { 
	
	if "`var'" == "fam_inc_real" local rname0 "rank_son_baseline"
	if "`var'" == "fam_inc_real" local ysel0 "ysel_son_baseline"
	
	if "`var'" == "father_income_baseline" local rname0 "rank_father_baseline"
	if "`var'" == "father_income_baseline" local ysel0 "ysel_father_baseline"
	
		egen `rname0' = xtile(`var') if inrange(dob,1910,1979) & baseline_sample==1, by(dob) nq(100) weight(wgt_sex_race)
		replace `rname0' =. if N_byDOB_baselinesample<100
		
		qui:gen `ysel0'=.
		replace `ysel0'= dob if `rname0'!=.

}

	assert rank_son_baseline==. if fam_inc_real==. 
	assert rank_father_baseline==. if father_income_baseline==. 
	
	label var rank_son_baseline "Rank son, family income, baseline"
	label var rank_father_baseline "Rank father, baseline"

*----------------------------------*
*----------------------------------*

/* Variation 1B: Baseline specification but with 
                 survey-provided weights   */

	foreach var of varlist fam_inc_real father_income_baseline { 
	
	if "`var'" == "fam_inc_real" local rname1 "rank_son_adjweight"
	if "`var'" == "fam_inc_real" local ysel1 "ysel_son_adjweight"
	
	if "`var'" == "father_income_baseline" local rname1 "rank_father_adjweight"
	if "`var'" == "father_income_baseline" local ysel1 "ysel_father_adjweight"

		egen `rname1' = xtile(`var') if inrange(dob,1910,1979) & baseline_sample==1, by(dob) nq(100) weight(weight_center)
		replace `rname1' =. if N_byDOB_baselinesample<100
		
		qui:gen `ysel1'=.
		replace `ysel1'= dob if `rname1'!=.

}

	assert rank_son_adjweight==. if fam_inc_real==. 
	assert rank_father_adjweight==. if father_income_baseline==. 
	
	label var rank_son_adjweight "Rank son, family income, survey weight"
	label var rank_father_adjweight "Rank father, survey weight"
	
*----------------------------------*
*----------------------------------*

/* Variation 1C: Baseline specification but without weights */

	foreach var of varlist fam_inc_real father_income_baseline { 
	
	if "`var'" == "fam_inc_real" local rname1 "rank_son_noweight"
	if "`var'" == "fam_inc_real" local ysel1 "ysel_son_noweight"
	
	if "`var'" == "father_income_baseline" local rname1 "rank_father_noweight"
	if "`var'" == "father_income_baseline" local ysel1 "ysel_father_noweight"

		egen `rname1' = xtile(`var') if inrange(dob,1910,1979) & baseline_sample==1, by(dob) nq(100) 
		replace `rname1' =. if N_byDOB_baselinesample<100
		
		qui:gen `ysel1'=.
		replace `ysel1'= dob if `rname1'!=.

}

	assert rank_son_noweight==. if fam_inc_real==. 
	assert rank_father_noweight==. if father_income_baseline==. 
	
	label var rank_son_noweight "Rank son, family income, no weight"
	label var rank_father_noweight "Rank father, no weight"
	
/* Variation 1D: Baseline specification but with weights 
				 adjusted for sex x race x edu x age 
				 shares in the Census  */

	foreach var of varlist fam_inc_real father_income_baseline { 
	
	if "`var'" == "fam_inc_real" local rname0 "rank_son_baseline_rsex_eduage"
	if "`var'" == "fam_inc_real" local ysel0 "ysel_son_baseline_rsex_eduage"
	
	if "`var'" == "father_income_baseline" local rname0 "rank_father_baseline_rsex_eduage"
	if "`var'" == "father_income_baseline" local ysel0 "ysel_father_baseline_rsex_eduage"
	
		egen `rname0' = xtile(`var') if inrange(dob,1910,1979) & baseline_sample==1, by(dob) nq(100) weight(wgt_sex_race_eduage)
		replace `rname0' =. if N_byDOB_baselinesample<100
		
		qui:gen `ysel0'=.
		replace `ysel0'= dob if `rname0'!=.

}

	assert rank_son_baseline_rsex_eduage==. if fam_inc_real==. 
	assert rank_father_baseline_rsex_eduage==. if father_income_baseline==. 
	
	label var rank_son_baseline_rsex_eduage "Rank son, family income, weight adjusted for race x sex x ed x age Census shares"
	label var rank_father_baseline_rsex_eduage "Rank father, weight adjusted for race x sex x ed x age Census shares"

*----------------------------------*
*----------------------------------*

/* Variation 2: Ranking various measures of income on 
			    sample of respondents with available 
			    father income by education level.
*/

bysort dob: egen N_byDOB_edusample = sum(edu_sample) 

	foreach var of varlist fam_inc_real father_income_baseline ///
	log_father_interpolated_CWfix log_father_byocc_interp_CWfix log_father_byrace_interp_CWfix log_father_byors_edu_interp {

	if "`var'" == "fam_inc_real" local rname3 "rank_son_edusample"
	if "`var'" == "fam_inc_real" local ysel3 "ysel_son_edusample"
	
	if "`var'" == "father_income_baseline" local rname3 "rank_father_baseline_edusample"
	if "`var'" == "father_income_baseline" local ysel3 "ysel_father_baseline_edusample"
	
	if "`var'" == "log_father_interpolated_CWfix" local rname3 "rank_father_CW_edusample"
	if "`var'" == "log_father_interpolated_CWfix" local ysel3 "ysel_father_CW_edusample"
	
	if "`var'" == "log_father_byors_edu_interp" local rname3 "rank_father_byr_bys_edu" 
	if "`var'" == "log_father_byors_edu_interp" local ysel3 "ysel_father_byr_bys_edu" 
	
	if "`var'" == "log_father_byocc_interp_CWfix" local rname3 "rank_father_byocc_edusample"
	if "`var'" == "log_father_byocc_interp_CWfix" local ysel3 "ysel_father_byocc_edusample"
	
	if "`var'" == "log_father_byrace_interp_CWfix" local rname3 "rank_father_byrace_edusample" 
	if "`var'" == "log_father_byrace_interp_CWfix" local ysel3 "ysel_father_byrace_edusample" 
	
		egen `rname3' = xtile(`var') if inrange(dob,1910,1979) & edu_sample==1, by(dob) nq(100) weight(wgt_sex_race_edu)
		replace `rname3' =. if N_byDOB_edusample<100
		
		qui:gen `ysel3'=.
		replace `ysel3'= dob if `rname3'!=.

		}

	assert rank_son_edusample==. if edu_sample==. 
	assert rank_father_byr_bys_edu==. if edu_sample==. 
	assert rank_father_baseline_edusample==. if edu_sample==.
	assert rank_father_byocc_edusample==. if edu_sample==. 
	assert rank_father_byrace_edusample==. if edu_sample==. 
	
	label var rank_son_edusample "Rank son, family income, edu. sample"
	label var rank_father_byr_bys_edu "Rank father, occ by race by south by edu., edu. sample"
	
	label var rank_father_baseline_edusample "Rank father, baseline income, edu. sample"
	label var rank_father_CW_edusample "Rank father, occ x race x south, CW fix, edu. sample"
	label var rank_father_byocc_edusample "Rank father, occ. only with CW fix, edu. sample"
	label var rank_father_byrace_edusample "Rank father, occ x race with CW fix, edu. sample"
	
*----------------------------------*
*----------------------------------*

/* Variation 3: Ranking alternate versions of father predicted 
                income on the baseline sample */  

	foreach var of varlist avg_occscore_1950_byocc log_father_closest_census_v2  log_father_closest_census log_father_interp_altwgt ///
	father_income_byocc log_father_byocc_personal father_income_byrace log_father_personalinc_interp log_father_interpolated_CWfix   { 
	
	if "`var'" == "avg_occscore_1950_byocc" local rname4 "rank_father_1950occscore" 
	if "`var'" == "avg_occscore_1950_byocc" local ysel4 "ysel_father_1950occscore" 
		
	if "`var'" == "log_father_closest_census" local rname4 "rank_father_closest_census" 
	if "`var'" == "log_father_closest_census" local ysel4 "ysel_father_closest_census" 
	
	if "`var'" == "log_father_closest_census_v2" local rname4 "rank_father_closest_census_v2" 
	if "`var'" == "log_father_closest_census_v2" local ysel4 "ysel_father_closest_census_v2" 
	
	if "`var'" == "log_father_interpolated_CWfix" local rname4 "rank_father_interp_CWfix" 
	if "`var'" == "log_father_interpolated_CWfix" local ysel4 "ysel_father_interp_CWfix" 
	
	if "`var'" == "log_father_personalinc_interp" local rname4 "rank_father_personalinc" 
	if "`var'" == "log_father_personalinc_interp" local ysel4 "ysel_father_personalinc" 
	
	if "`var'" == "log_father_interp_altwgt" local rname4 "rank_father_byr_bys_altwgt" 
	if "`var'" == "log_father_interp_altwgt" local ysel4 "ysel_father_byr_bys_altwgt" 
		
	if "`var'" == "father_income_byocc" local rname4 "rank_father_interp_occ" 
	if "`var'" == "father_income_byocc" local ysel4 "ysel_father_interp_occ" 
	
	if "`var'" == "log_father_byocc_personal" local rname4 "rank_father_personalinc_occ" 
	if "`var'" == "log_father_byocc_personal" local ysel4 "ysel_father_personalinc_occ" 
	
	if "`var'" == "father_income_byrace" local rname4 "rank_father_interp_byr" 
	if "`var'" == "father_income_byrace" local ysel4 "ysel_father_interp_byr"	
	
		count if `var'==. & baseline_sample==1
		assert `r(N)'==0 
	
		egen `rname4' = xtile(`var') if inrange(dob,1910,1979) & baseline_sample==1, by(dob) nq(100) weight(wgt_sex_race)
		replace `rname4' =. if  N_byDOB_baselinesample<100
		
		qui:gen `ysel4'=.
		replace `ysel4'= dob if `rname4'!=.
		tab `ysel4',m 
	}
	
	assert rank_father_1950occscore==. if avg_occscore_1950_byocc==. 
	label var rank_father_1950occscore "Rank father, 1950 IPUMS occscore, baseline sample"
			
	assert rank_father_closest_census==. if log_father_closest_census==.
	label var rank_father_closest_census "Rank father, baseline sample, using closest Census to age 16"
	
	assert rank_father_closest_census_v2==. if log_father_closest_census_v2==.
	label var rank_father_closest_census_v2 "Rank father, baseline sample, using closest Census to cohort"
	
	assert rank_father_personalinc==. if log_father_personalinc_interp==. 
	label var rank_father_personalinc "Rank father, baseline sample, interp., personal inc."
	
	assert rank_father_personalinc_occ==. if log_father_byocc_personal==. 
	label var rank_father_personalinc_occ "Rank father, baseline sample, personal inc., occ only"
	
	assert rank_father_interp_CWfix==. if log_father_interpolated_CWfix==. 
	label var rank_father_interp_CWfix "Rank father, baseline sample, interp., CW fix in 1940"	
	
	assert rank_father_byr_bys_altwgt==. if log_father_interp_altwgt==. 
	label var rank_father_byr_bys_altwgt "Rank father, baseline sample, interp., alt. weight"	
		
	assert rank_father_interp_occ==. if father_income_byocc==. 
	label var rank_father_interp_occ "Rank father, baseline sample, interp., just with occ."
	
	assert rank_father_interp_byr==. if father_income_byrace==. 
	label var rank_father_interp_byr "Rank father, baseline sample, interp., occ by race"
	
*----------------------------------*
*----------------------------------*

/* Variation 4: Rank total family income and parental predicted
                income (working fathers, non-working fathers,
                and working mothers) on sample where missing 
                parental predicted income is replaced as 0.  
*/
	bysort dob: egen N_byDOB_zerossample = sum(zeros_sample) 

	foreach var of varlist parent_income_wzeros fam_inc_real { 
	
	if "`var'" == "fam_inc_real" local rname5 "rank_son_zeros_parent"
	if "`var'" == "fam_inc_real" local ysel5 "ysel_son_zeros_parent"
	
	if "`var'" == "parent_income_wzeros" local rname5 "rank_parent_zeros"
	if "`var'" == "parent_income_wzeros" local ysel5 "ysel_parent_zeros"
	
		egen `rname5' = xtile(`var') if inrange(dob,1910,1979) & zeros_sample==1, by(dob) nq(100) weight(wgt_sex_race_zeros)
		replace `rname5' =. if N_byDOB_zerossample<100
		
		qui:gen `ysel5'=.
		replace `ysel5'= dob if `rname5'!=.

}

	assert rank_son_zeros_parent==. if fam_inc_real==. 
	assert rank_parent_zeros==. if parent_income_wzeros==. 
	
	label var rank_son_zeros_parent "Rank son, family income, sample with father missings replaced as 0 (incdata only)"
	label var rank_parent_zeros "Rank parent, sample with father & mother missings replaced as 0 (incdata only)"

*----------------------------------*
*----------------------------------*

/* Variation 5: Rank total family income and father predicted 
 			    income on sample that excludes fathers working
 			    in farm occupations.  */

	bysort dob: egen N_byDOB_nonfarmsample = sum(nonfarm_sample) 

	foreach var of varlist fam_inc_real father_income_baseline { 
	
	if "`var'" == "fam_inc_real" local rname6 "rank_son_baseline_nonfarm"
	if "`var'" == "fam_inc_real" local ysel6 "ysel_son_baseline_nonfarm"
	
	if "`var'" == "father_income_baseline" local rname6 "rank_father_baseline_nonfarm"
	if "`var'" == "father_income_baseline" local ysel6 "ysel_father_baseline_nonfarm"
	
		egen `rname6' = xtile(`var') if inrange(dob,1910,1979) & nonfarm_sample==1, by(dob) nq(100) weight(wgt_sex_race_nonfarm)
		replace `rname6' =. if N_byDOB_nonfarmsample<100
		
		qui:gen `ysel6'=.
		replace `ysel6'= dob if `rname6'!=.

}

	assert rank_son_baseline_nonfarm==. if nonfarm_sample==. 
	assert rank_father_baseline_nonfarm==. if nonfarm_sample==. 
	
	label var rank_son_baseline_nonfarm "Rank son, family income, baseline but nonfarm sample"
	label var rank_father_baseline_nonfarm "Rank father, baseline but nonfarm sample"
	
*----------------------------------*
*----------------------------------*

/* Variation 6: Rank total family income and measures of
				parental predicted income on expanded samples 
				(i.e, baseline + respondents w/ available mother 
				predicted income, baseline + respondents w/ unemployed
				father predicted income, and baseline + mother income + 
				unemployed father income) */

	sort dob
	by dob: egen N_byDOB_father_notworking_sample = sum(father_notworking_sample)
	by dob: egen N_byDOB_parent_sample = sum(parent_sample) 
	by dob: egen N_byDOB_parent_all_sample = sum(parent_all_sample) 

	foreach x in father_baseline_plus_notworking parent_income parent_income_all {

		if "`x'"=="father_baseline_plus_notworking" local title "father_notworking"	
		if "`x'"=="parent_income" local title "parent"
		if "`x'"=="parent_income_all" local title "parent_all"

		foreach var of varlist fam_inc_real `x' { 
		
		if "`var'" == "fam_inc_real" local rname7 "rank_son_`title'"
		if "`var'" == "fam_inc_real" local ysel7 "ysel_son_`title'"
		
		if "`var'" == "`x'" local rname7 "rank_`title'"
		if "`var'" == "`x'" local ysel7 "ysel_`title'"
		
			egen `rname7' = xtile(`var') if inrange(dob,1910,1979) & `title'_sample==1, by(dob) nq(100) weight(wgt_sex_race_`title')
			replace `rname7' =. if N_byDOB_`title'_sample<100
			
			qui:gen `ysel7'=.
			replace `ysel7'= dob if `rname7'!=.

		}
	}

	assert rank_son_father_notworking==. if fam_inc_real==. 
	assert rank_father_notworking==. if father_baseline_plus_notworking==. 
	
	label var rank_son_father_notworking "Rank son, family income, sample with unemployed dads"
	label var rank_father_notworking "Rank fathers, sample with unemployed dads"
	
	assert rank_son_parent==. if fam_inc_real==. 
	assert rank_parent==. if parent_income==. 
	
	label var rank_son_parent "Rank son, family income, sample with mother occ. when father missing"
	label var rank_parent "Rank parents, sample with mother occ. when father missing"
	
	assert rank_son_parent_all==. if fam_inc_real==.
	assert rank_parent_all==. if parent_income_all==. 
	
	label var rank_son_parent_all "Rank son, family income, sample with mother occ. and unemployed dads"
	label var rank_parent_all "Rank parents, sample with mother occ. and unemployed dads"

*----------------------------------*
*----------------------------------*

/* Variation 7: Rank various income measures on sample 
                of surveys with household size available */

	bysort dob: egen N_byDOB_equivalizedsample = sum(equivalized_sample) 

	foreach var of varlist fam_inc_real son_baseline_equivalized father_income_baseline father_baseline_equivalized { 

	if "`var'" == "fam_inc_real" local rname0 "rank_son_eq_sample"
	if "`var'" == "fam_inc_real" local ysel0 "ysel_son_eq_sample"
	
	if "`var'" == "father_income_baseline" local rname0 "rank_father_baseline_eq_sample"
	if "`var'" == "father_income_baseline" local ysel0 "ysel_father_baseline_eq_sample"
	
	if "`var'" == "son_baseline_equivalized" local rname0 "rank_son_equivalized"
	if "`var'" == "son_baseline_equivalized" local ysel0 "ysel_son_equivalized"
	
	if "`var'" == "father_baseline_equivalized" local rname0 "rank_father_equivalized"
	if "`var'" == "father_baseline_equivalized" local ysel0 "ysel_father_equivalized"	

	
		egen `rname0' = xtile(`var') if inrange(dob,1910,1979) & equivalized_sample==1, by(dob) nq(100) weight(wgt_sex_race_equivalized)
		replace `rname0' =. if N_byDOB_equivalizedsample<100
		
		qui:gen `ysel0'=.
		replace `ysel0'= dob if `rname0'!=.

}

	assert rank_son_eq_sample==. if equivalized_sample==. 
	assert rank_father_baseline_eq_sample==. if equivalized_sample==. 
	assert rank_son_equivalized==. if equivalized_sample==. 
	assert rank_father_equivalized==. if equivalized_sample==. 
	
	label var rank_son_eq_sample "Rank son, family income, equivalized sample"
	label var rank_father_baseline_eq_sample "Rank father, baseline income, equivalized sample"
	label var rank_son_equivalized "Rank son, equivalized income and sample"
	label var rank_father_equivalized "Rank father, equivalized income and sample"
	
*----------------------------------*
*----------------------------------*
	
/* Variation 8: Rank total family income and father predicted 
 			    income on sample of nationally representative 
 			    surveys */

bysort dob: egen N_byDOB_common = sum(common_survey_sample) 

	foreach var of varlist fam_inc_real father_income_baseline { 
	
	if "`var'" == "fam_inc_real" local rname10 "rank_son_baseline_com_surv"
	if "`var'" == "fam_inc_real" local ysel10 "ysel_son_baseline_com_surv"
	
	if "`var'" == "father_income_baseline" local rname10 "rank_father_baseline_com_surv"
	if "`var'" == "father_income_baseline" local ysel10 "ysel_father_baseline_com_surv"
	
		egen `rname10' = xtile(`var') if inrange(dob,1910,1979) & common_survey_sample==1, by(dob) nq(100) weight(wgt_sex_race_common_survey)
		replace `rname10' =. if N_byDOB_common<100
		
		qui:gen `ysel10'=.
		replace `ysel10'= dob if `rname10'!=.

}

	assert rank_son_baseline_com_surv==. if common_survey_sample==. 
	assert rank_father_baseline_com_surv==. if common_survey_sample==. 
	
	label var rank_son_baseline_com_surv "Rank son, family income, nationally representative surveys"
	label var rank_father_baseline_com_surv "Rank father, baseline but nationally representative surveys"
	
*----------------------------------*
*----------------------------------*
 
 /* Variation 9: Ranking various income measures on the sample 
 				 with available father income by region */

	bysort dob: egen N_byDOB_regionsample = sum(region_sample) 

	foreach var of varlist fam_inc_real father_income_baseline log_father_interpolated_CWfix  log_father_byregion_interp  { 

	if "`var'" == "fam_inc_real" local rname3 "rank_son_regionsamp"
	if "`var'" == "fam_inc_real" local ysel3 "ysel_son_regionsamp"
	
	if "`var'" == "father_income_baseline" local rname3 "rank_father_baseline_regsamp"
	if "`var'" == "father_income_baseline" local ysel3 "ysel_father_baseline_regsamp"
	
	if "`var'" == "log_father_interpolated_CWfix" local rname3 "rank_father_southCW_regsamp"
	if "`var'" == "log_father_interpolated_CWfix" local ysel3 "ysel_father_southCW_regsamp" 
	
	if "`var'" == "log_father_byregion_interp" local rname3 "rank_father_byregion"
	if "`var'" == "log_father_byregion_interp" local ysel3 "ysel_father_byregion" 
	
	if "`var'" == "log_father_byocc_interp_CWfix" local rname3 "rank_father_byocc_regsamp"
	if "`var'" == "log_father_byocc_interp_CWfix" local ysel3 "ysel_father_byocc_regsamp" 
	
	if "`var'" == "log_father_byrace_interp_CWfix" local rname3 "rank_father_byrace_regsamp"
	if "`var'" == "log_father_byrace_interp_CWfix" local ysel3 "rank_father_byrace_regsamp" 
	
		egen `rname3' = xtile(`var') if inrange(dob,1910,1979) & region_sample==1, by(dob) nq(100) weight(wgt_sex_race_region)
		replace `rname3' =. if N_byDOB_regionsample<100
		
		qui:gen `ysel3'=.
		replace `ysel3'= dob if `rname3'!=.

		}

	assert rank_son_regionsamp==. if fam_inc_real==. 
	assert rank_father_baseline_regsamp==. if father_income_baseline==. 
	assert rank_father_southCW_regsamp==. if log_father_interpolated_CWfix==. 
	assert rank_father_byregion==. if log_father_byregion_interp==. 
	
	label var rank_son_regionsamp "Rank son, family income, region sample"
	label var rank_father_baseline_regsamp "Rank father, baseline income, region sample"
	label var rank_father_byregion "Rank father, by race by region, region sample, interpolated"
	label var rank_father_southCW_regsamp "Rank father, occ x race x south, CW fix, region sample"

*----------------------------------*
*----------------------------------*
 
 /* Variation 10: Rank total family income and father predicted 
 			      income on sample that excludes the National
 			      Fertility Study */
 
	bysort dob: egen N_byDOB_nonnfssample = sum(non_nfs_sample) 

	foreach var of varlist fam_inc_real father_income_baseline { 
	
	if "`var'" == "fam_inc_real" local rname0 "rank_son_baseline_nonnfs"
	if "`var'" == "fam_inc_real" local ysel0 "ysel_son_baseline_nonnfs"
	
	if "`var'" == "father_income_baseline" local rname0 "rank_father_baseline_nonnfs"
	if "`var'" == "father_income_baseline" local ysel0 "ysel_father_baseline_nonnfs"
	
		egen `rname0' = xtile(`var') if inrange(dob,1910,1979) & non_nfs_sample==1, by(dob) nq(100) weight(wgt_sex_race_non_nfs)
		replace `rname0' =. if N_byDOB_nonnfssample<100
		
		qui:gen `ysel0'=.
		replace `ysel0'= dob if `rname0'!=.

}

	assert rank_son_baseline_nonnfs==. if fam_inc_real==. 
	assert rank_father_baseline_nonnfs==. if father_income_baseline==. 
	
	label var rank_son_baseline_nonnfs "Rank son, family income, baseline on non-nfs sample"
	label var rank_father_baseline_nonnfs "Rank father, baseline on non-nfs sample"
	
*----------------------------------*
*----------------------------------*
 
 /* Variation 11: Rank total family income and father predicted 
 			      income on sample of surveys where sampling occurs
 			      at the household level */
 
	bysort dob: egen N_byDOB_hhbasedsample = sum(hh_based_sample) 

	foreach var of varlist fam_inc_real father_income_baseline { 
	
	if "`var'" == "fam_inc_real" local rname0 "rank_son_baseline_hhbased"
	if "`var'" == "fam_inc_real" local ysel0 "ysel_son_baseline_hhbased"
	
	if "`var'" == "father_income_baseline" local rname0 "rank_father_baseline_hhbased"
	if "`var'" == "father_income_baseline" local ysel0 "ysel_father_baseline_hhbased"
	
		egen `rname0' = xtile(`var') if inrange(dob,1910,1979) & hh_based_sample==1, by(dob) nq(100) weight(wgt_sex_race_hh_based)
		replace `rname0' =. if N_byDOB_hhbasedsample<100
		
		qui:gen `ysel0'=.
		replace `ysel0'= dob if `rname0'!=.

}

	assert rank_son_baseline_hhbased==. if fam_inc_real==. 
	assert rank_father_baseline_hhbased==. if father_income_baseline==. 
	
	label var rank_son_baseline_hhbased "Rank son, family income, baseline on HH-based sample"
	label var rank_father_baseline_hhbased "Rank father, baseline on HH-based sample"

*----------------------------------*
*----------------------------------*
 
 /* Variation 12: Rank total family income and father predicted 
 				  income on sample that excludes 3 most heavily
 				  top-coded datasets */
 
	bysort dob: egen N_byDOB_nontopcodsample = sum(non_topcod_sample) 

	foreach var of varlist fam_inc_real father_income_baseline { 
	
	if "`var'" == "fam_inc_real" local rname0 "rank_son_baseline_nontopcod"
	if "`var'" == "fam_inc_real" local ysel0 "ysel_son_baseline_nontopcod"
	
	if "`var'" == "father_income_baseline" local rname0 "rank_father_baseline_nontopcod"
	if "`var'" == "father_income_baseline" local ysel0 "ysel_father_baseline_nontopcod"
	
		egen `rname0' = xtile(`var') if inrange(dob,1910,1979) & non_topcod_sample==1, by(dob) nq(100) weight(wgt_sex_race_non_topcod)
		replace `rname0' =. if N_byDOB_nontopcodsample<100
		
		qui:gen `ysel0'=.
		replace `ysel0'= dob if `rname0'!=.

}

	assert rank_son_baseline_nontopcod==. if fam_inc_real==. 
	assert rank_father_baseline_nontopcod==. if father_income_baseline==. 
	
	label var rank_son_baseline_nontopcod "Rank son, family income, baseline on least top-coded sample"
	label var rank_father_baseline_nontopcod "Rank father, baseline on least top-coded sample"

*----------------------------------*
*----------------------------------*
 
 /* Variation 13: Create ranked south-only, race-only, 
                  race x south and occ x south father income
                  measures */
	
	foreach var of varlist log_father_byoccsouth_interp log_father_just_race_interp log_father_just_south_interp log_father_just_rs_interp { 
	
	if "`var'" == "log_father_byoccsouth_interp" local rname0 "rank_father_byoccsouth"
	if "`var'" == "log_father_byoccsouth_interp" local ysel0 "ysel_father_byoccsouth"
	
	if "`var'" == "log_father_just_race_interp" local rname0 "rank_father_justrace"
	if "`var'" == "log_father_just_race_interp" local ysel0 "ysel_father_justrace"

	if "`var'" == "log_father_just_south_interp" local rname0 "rank_father_justsouth"
	if "`var'" == "log_father_just_south_interp" local ysel0 "ysel_father_justsouth"

	if "`var'" == "log_father_just_rs_interp" local rname0 "rank_father_just_rs"
	if "`var'" == "log_father_just_rs_interp" local ysel0 "ysel_father_just_rs"
	
		egen `rname0' = xtile(`var') if inrange(dob,1910,1979) & baseline_sample==1, by(dob) nq(100) weight(wgt_sex_race)
		replace `rname0' =. if N_byDOB_baselinesample<100
		
		qui:gen `ysel0'=.
		replace `ysel0'= dob if `rname0'!=.

}

	assert rank_father_byoccsouth==. if log_father_byoccsouth_interp==. 
	assert rank_father_justrace==. if log_father_just_race_interp==. 
	assert rank_father_justsouth==. if log_father_just_south_interp==. 
	assert rank_father_just_rs==. if log_father_just_rs_interp==. 

	label var rank_father_byoccsouth "Rank father, occ x south"
	label var rank_father_justrace "Rank father, just race"
	label var rank_father_justsouth "Rank father, just south"
	label var rank_father_just_rs "Rank father, race x south"

*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*

*************************
*** FINAL VARIABLES FOR ANALYSIS
*************************

* Sample indicators 
	replace baseline_sample=. if rank_son_baseline==. | rank_father_baseline==.
	replace edu_sample=. if rank_son_edusample==. | rank_father_byr_bys_edu==. 
	replace nonfarm_sample=. if rank_son_baseline_nonfarm==. | rank_father_baseline_nonfarm==.
	replace zeros_sample=. if rank_son_zeros_parent==. | rank_parent_zeros==. 
	replace equivalized_sample=. if rank_son_equivalized==. | rank_father_equivalized==.
	replace region_sample=. if rank_son_regionsamp==. | rank_father_byregion==.
	replace parent_sample=. if rank_son_parent==. | rank_parent==. 
	replace parent_all_sample=. if rank_son_parent_all==. | rank_parent_all==. 
	replace father_notworking_sample=. if rank_son_father_notworking==. | rank_father_notworking==. 
	replace common_survey_sample=. if rank_son_baseline_com_surv==. | rank_father_baseline_com_surv==.
	replace non_nfs_sample=. if rank_son_baseline_nonnfs==. | rank_father_baseline_nonnfs==.
	replace hh_based_sample=. if rank_son_baseline_hhbased==. | rank_father_baseline_hhbased==.
	replace non_topcod_sample=. if rank_son_baseline_nontopcod==. | rank_father_baseline_nontopcod==.
	
* Indicators for missing family or father income
	gen fam_inc_missing = fam_inc_real==. & incdata==1
	label var fam_inc_missing "Family income missing within income data"
	
	gen father_inc_missing = father_income_baseline==. & incdata==1
	label var father_inc_missing "Family income missing within income data"

* Create survey-year fixed effects
	egen survey_year=group(data year)
	label var survey_year "Survey-year fixed effects"
	
/* Rename PSID cross-sections so that 
   they are treated as 1 dataset */
	replace data="psid" if data=="psid1997" | data=="psid2017"
		
* Label helpful variables
	label var occRej "Respondent occupation, coarsened"
	label var age "Respondent age"
	label var fatheroccej "Father occupation, coarsened"
	label var sex "Respondent sex"
	label var agesq "Respondent age squared"
	label var foreignborn "Respondent foreignborn"
	label var fatherforeign "Father foreign born"
	label var employed "Respondent is employed"
	label var selfemployed "Respondent is selfemployed"
	label var eduR "Respondent education, binned"
	label var moved_state "Respondent moved state"
	label var hs_ed "Respondent is HS educated"
	label var coll_ed "Respondent is college educated"
	label var yrsschool_dad "Father years of school"
	label var fatherfarm "Father works in farming occupation"
	label var data "Dataset"	
	label var white_male "Respondent is white male"
	label var white_female "Respondent is white female"
	label var nonw_male "Respondent is non-white male"
	label var nonw_female "Respondent is non-white female"
	label var wgt_sex_race "Population adj. weight to account for gender and race ratios (baseline sample)"
	label var wgt_sex_race_nonfarm "Pop. adj. weight to account for gender and race ratios (nonfarm sample)"
	label var wgt_sex_race_edu "Pop. adj. weight to account for gender and race ratios (father edu. sample)"
	label var wgt_sex_race_father_notworking "Pop. adj. weight (plus unemployed dads)"
	label var wgt_sex_race_parent "Pop. adj. weight to account for gender and race ratios (with moms)"
	label var wgt_sex_race_parent_all "Pop. adj. weight (mom and unemployed dads sample)"
	label var wgt_sex_race_zeros "Pop. adj. weight to account for gender and race ratios (income datasets)"
	label var wgt_sex_race_equivalized "Pop. adj. weight to account for gender and race ratios (datasets w/ HH size)"
	label var wgt_sex_race_region "Pop. adj. weight to account for gender and race ratios (datasets w/ region available)"
	label var wgt_sex_race_common_survey "Pop. adj. weight to account for gender and race ratios (nat. rep. datasets)"
	label var wgt_sex_race_sameshares "Pop. adj. weight not allowing gender and race ratios to change over time (baseline sample)"
	label var wgt_sex_race_non_nfs "Pop. adj. weight excluding NFS respondents from sample"
	label var wgt_sex_race_hh_based "Pop. adj. weight excluding non-HH-based surveys from sample"	
	label var wgt_sex_race_non_topcod "Pop. adj. weight excluding most top-coded datasets from sample"
	label var bornsouth "Respondent born in South"
	label var never_married "Respondent never married"
	label var widowed "Respondent is widowed"
	label var divorced "Respondent is divorced"
	label var separated "Respondent is separated"
	label var edu_sample "Obs has non-missing family income and race by edu income score for father"
	label var baseline_sample "Obs has non-missing family income and baseline income score"
	label var zeros_sample "Obs in datasets with father missings replaced with 0s"
	label var nonfarm_sample "Individuals in baseline sample minus those with fathers in ag. occs."	
	label var equivalized_sample "Individuals in baseline sample minus those with HH size unavailable"	
	label var parent_sample "Obs that have mother occ. available if father occ. is missing"
	label var parent_all_sample "Obs that mother occ. or unemployed dads if father occ. missing"
	label var father_notworking_sample "Obs that have unemployed dads if father occ. missing"
	label var common_survey_sample "Obs in nationally representative surveys"
	label var region_sample "Obs in surveys with region available"
	label var non_nfs_sample "Obs not belonging to National Fertility Study"
	label var hh_based_sample "Obs belonging to HH-based surveys"
	label var non_topcod_sample "Obs not belonging to 3 most heavily top-coded datasets"
	label var age10 "Year in which respondent turns 10"
	label var exp_interpolated_income "Exponential of log interpolated income"		
	label var decade_1 "Binary: 1910 decade"
	label var decade_2 "Binary: 1920 decade" 
	label var decade_3 "Binary: 1930 decade"
	label var decade_4 "Binary: 1940 decade" 
	label var decade_5 "Binary: 1950 decade"
	label var decade_6 "Binary: 1960 decade"
	label var decade_7 "Binary: 1970 decade"
	
	drop famid samp_id N_byDOB*  ysel* lnfaminc R_* headofhh_* 

******************
*** SAVE 
******************

compress
save ./3_Output/2_PooledData_analysis.dta, replace
